Analysis

Count data

Data wrangling

EA sampling sites

# WD
setwd("~/PhD/London NERC DTP documents/EA data/EnvironmentAgency_EcologyFish_DataDownload_2025-04-02")

# Load data
counts <- read_csv("TraC_Fish_Counts_2025-04-02.csv")
sites <- read_csv("TraC_Fish_Sites_2025-04-02.csv")

# Join count data with site information
counts_clean <- counts %>% left_join(sites, by = "SITE_ID") %>%
  mutate(EVENT_DATE = ymd(EVENT_DATE),
         EVENT_YEAR = year(EVENT_DATE),
         EVENT_MONTH = month(EVENT_DATE),
         SEASON = if_else(EVENT_MONTH %in% 4:9, "Summer", "Winter"))

counts_clean <- counts_clean %>%
  select(
    SITE_ID,
    SITE_NAME = SITE_NAME.x,
    SITE_PARENT_NAME,
    EVENT_DATE,
    SPECIES_NAME,
    LATIN_NAME,
    FISH_COUNT,
    TOP_TIER_SITE,
    SITE_RANKED_NGR,
    SITE_RANKED_EASTING,
    SITE_RANKED_NORTHING)

# Convert dates and extract components
counts_clean <- counts_clean %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_DATE_YEAR = year(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    EVENT_DAY = day(EVENT_DATE))

# Sampling method
counts_clean <- counts_clean %>%
  mutate(
    METHOD = case_when(
      str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
      str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
      str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
      str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
      str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
      str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
      str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
      str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
      TRUE ~ "Unknown"))

# Thames only
counts_thames <- counts_clean %>%
  filter(str_detect(TOP_TIER_SITE, regex("thames", ignore_case = TRUE)))

# Lat and long
sites_sf <- st_as_sf(counts_thames, coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
  st_transform(crs = 4326)
coords <- st_coordinates(sites_sf)
sites_sf$lon <- coords[, 1]
sites_sf$lat <- coords[, 2]

# Gears
sites_sf <- sites_sf %>%
  mutate(
    GEAR_TYPE = case_when(
      str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
      str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
      str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
      str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
      str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
      str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
      str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
      str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
      TRUE ~ "Unknown"))

# Map
gear_types <- unique(sites_sf$GEAR_TYPE)
gear_pal <- colorFactor(palette = "Set2", domain = gear_types)

leaflet(sites_sf) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircleMarkers(
    lng = ~lon, lat = ~lat,
    color = ~gear_pal(GEAR_TYPE),
    radius = 6,
    stroke = TRUE, weight = 1, fillOpacity = 0.9,
    popup = ~paste0(
      "<b>Site:</b> ", SITE_NAME, "<br>",
      "<b>Zone:</b> ", TOP_TIER_SITE, "<br>",
      "<b>Gear:</b> ", GEAR_TYPE), group = ~GEAR_TYPE) %>%
  addLegend(
    position = "bottomright",
    pal = gear_pal,
    values = ~GEAR_TYPE,
    title = "Gear Type",
    opacity = 1) %>%
  addLayersControl(
    overlayGroups = gear_types,
    options = layersControlOptions(collapsed = FALSE),
    position = "topright")
# RA
gear_totals <- counts_thames %>%
  filter(!is.na(METHOD)) %>%
  group_by(METHOD) %>%
  summarise(
    Total_Individuals = sum(FISH_COUNT, na.rm = TRUE),
    Total_Species = n_distinct(SPECIES_NAME),
    .groups = "drop")

# Get grand total
grand_total <- sum(gear_totals$Total_Individuals)

# Add relative abundance column
gear_totals <- gear_totals %>%
  mutate(
    Relative_Abundance = round(100 * Total_Individuals / grand_total, 1)
  ) %>%
  arrange(desc(Relative_Abundance)) %>%
  mutate(METHOD = factor(METHOD, levels = unique(METHOD)))

ggplot(gear_totals, aes(x = reorder(METHOD, -Relative_Abundance), y = Relative_Abundance)) +
  geom_col(fill = "tomato") +
  labs(
    title = "Relative abundance by sampling method",
    x = "Sampling method",
    y = "Relative abundance (%)") +
  theme_minimal()

# Matrix
gear_species_table <- counts_thames %>%
  filter(!is.na(METHOD), !is.na(SPECIES_NAME)) %>%
  count(METHOD, SPECIES_NAME) %>%
  pivot_wider(names_from = METHOD, values_from = n, values_fill = 0)

gear_pa <- gear_species_table %>%
  mutate(across(-SPECIES_NAME, ~ as.integer(. > 0)))

gear_cols <- names(gear_pa)[!names(gear_pa) %in% "SPECIES_NAME"]

gear_unique <- gear_pa %>%
  rowwise() %>%
  mutate(unique_to = if (sum(c_across(all_of(gear_cols))) == 1) {
    gear_cols[which(c_across(all_of(gear_cols)) == 1)]
  } else {
    NA_character_
  }) %>%
  ungroup() %>%
  filter(!is.na(unique_to))

unique_counts <- gear_unique %>%
  count(unique_to, name = "unique_species_count") %>%
  arrange(desc(unique_species_count))

ggplot(unique_counts, aes(x = reorder(unique_to, unique_species_count), y = unique_species_count, fill = unique_to)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Number of species unique to each gear type",
    x = "Sampling method", y = "Unique species count") +
  theme_minimal()

gear_pa_long <- gear_pa %>%
  pivot_longer(cols = -SPECIES_NAME, names_to = "METHOD", values_to = "present")

ggplot(gear_pa_long, aes(x = METHOD, y = SPECIES_NAME, fill = as.factor(present))) +
  geom_tile(color = "white") +
  scale_fill_manual(values = c("0" = "white", "1" = "steelblue")) +
  labs(
    title = "Species presence across sampling method",
    x = "Sampling method", y = "Species",
    fill = "Present") +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 6))

Focusing on seine net data only

# Seine net only
counts_thames_filtered <- counts_thames %>%
  filter(METHOD == "Seine Net")

How is fish alpha diversity changing over time across the estuary?

# Estuary
diversity_summary <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR) %>%
  summarise(
    Species_Richness = n_distinct(SPECIES_NAME),
    Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
    Shannon_Diversity = diversity(table(SPECIES_NAME), index = "shannon"),
    Simpson_Diversity = diversity(table(SPECIES_NAME), index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Species_Richness - 1) / log(Total_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Species_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR)))

# Reshape
diversity_long <- diversity_summary %>%
  pivot_longer(cols = -EVENT_DATE_YEAR, names_to = "Metric", values_to = "Value")

# Raw trends over time
ggplot(diversity_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(color = "red", linewidth = 1) +  
  geom_smooth(method = "loess", se = TRUE, color = "black", linewidth = 1) + 
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Diversity metrics over time - Thames Estuary (seine net)",
    subtitle = "LOESS smoothed trends across all years",
    x = "Year", y = "Diversity metric") +
  theme_minimal()

# GLMs/LMMs for each metric
glm_richness   <- glm(Species_Richness ~ EVENT_DATE_YEAR, data = diversity_summary, family = poisson())
lm_abundance   <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_shannon     <- lm(Shannon_Diversity ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_simpson     <- lm(Simpson_Diversity ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_margalef    <- lm(Margalef_Richness ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_evenness    <- lm(Pielou_Evenness ~ EVENT_DATE_YEAR, data = diversity_summary)

# Extract slope and p-values
summary_table <- function(model, metric_name) {
  coef_summary <- summary(model)$coefficients
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p_col <- intersect(c("Pr(>|t|)", "Pr(>|z|)"), colnames(coef_summary))
  p <- coef_summary["EVENT_DATE_YEAR", p_col[1]]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ "")
  data.frame(
    Metric = metric_name,
    Slope = slope,
    SE = se,
    CI_low = ci_low,
    CI_high = ci_high,
    P_value = round(p, 4),
    Significance = sig
  )
}

# Summary table
results <- bind_rows(
  summary_table(glm_richness,   "Species Richness"),
  #summary_table(lm_abundance,   "Total Abundance"),
  summary_table(lm_shannon,     "Shannon Diversity"),
  summary_table(lm_simpson,     "Simpson Diversity"),
  summary_table(lm_margalef,    "Margalef Richness"),
  summary_table(lm_evenness,    "Pielou Evenness"))

# Plot
ggplot(results, aes(x = Slope, y = reorder(Metric, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_point(size = 3, color = "tomato") +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.2, color = "gray40") +
  geom_text(aes(label = Significance), hjust = -0.3, vjust = 0.1, size = 5) +
  labs(
    title = "Trend in diversity metrics over time - Thames Estuary (seine net)",
    subtitle = "Slope ± 95% CI | GLM or LM by metric",
    x = "Slope (Change per year)",
    y = "Diversity metric",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal()

How is fish alpha diversity changing over time across within each zone?

# Zone
zone_summary <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, TOP_TIER_SITE, SPECIES_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Wide community matrix for each zone-year
community_matrix <- zone_summary %>%
  pivot_wider(names_from = SPECIES_NAME, values_from = FISH_COUNT, values_fill = 0)

# Diversity metrics
diversity_zone_year <- community_matrix %>%
  rowwise() %>%
  mutate(
    Species_Richness = sum(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE)) > 0),
    Total_Abundance = sum(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE))),
    Shannon_Diversity = diversity(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE)), index = "shannon"),
    Simpson_Diversity = diversity(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE)), index = "simpson")) %>%
  ungroup() %>%
  mutate(
    Margalef_Richness = (Species_Richness - 1) / log(Total_Abundance + 1),
    Pielou_Evenness = Shannon_Diversity / log(Species_Richness + 1),
    EVENT_DATE_YEAR = as.numeric(EVENT_DATE_YEAR))

# Reshape
diversity_long_zone <- diversity_zone_year %>%
  pivot_longer(cols = c(Species_Richness, Shannon_Diversity, Simpson_Diversity,
                        Margalef_Richness, Pielou_Evenness, Total_Abundance),
               names_to = "Metric", values_to = "Value")

# Model
zone_model_summary <- function(df) {
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
  }
  model <- lm(Value ~ EVENT_DATE_YEAR, data = df)
  s <- summary(model)
  slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
  se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

zone_results <- diversity_long_zone %>%
  group_by(TOP_TIER_SITE, Metric) %>%
  group_modify(~ zone_model_summary(.x)) %>%
  ungroup() 

# Plot
ggplot(zone_results %>% filter(!is.na(Slope)),
       aes(x = Slope, y = TOP_TIER_SITE, color = Significance)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(size = 3) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3, color = "gray40") +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in diversity metrics over time by zone - Thames Estuary (seine net)",
    subtitle = "Slope ± 95% CI | Linear model per zone & metric",
    x = "Slope (Change per year)",
    y = "Zone",
    color = "Significance",
    caption = "* p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal() +
  theme(strip.text = element_text(face = "bold"))

Where are the hotspots of ecological change?

# Site
diversity_site_year <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Species_Richness = n_distinct(SPECIES_NAME),
    Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
    Shannon_Diversity = diversity(table(SPECIES_NAME), index = "shannon"),
    Simpson_Diversity = diversity(table(SPECIES_NAME), index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Species_Richness - 1) / log(Total_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Species_Richness),
    EVENT_DATE_YEAR = as.numeric(EVENT_DATE_YEAR))

# Reshape
diversity_long_site <- diversity_site_year %>%
  pivot_longer(cols = c(Species_Richness, Shannon_Diversity, Simpson_Diversity,
                        Margalef_Richness, Pielou_Evenness, Total_Abundance),
               names_to = "Metric", values_to = "Value")

# Model
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply per group
site_results <- diversity_long_site %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

site_results_clean <- site_results %>%
  filter(!is.na(Slope))

# Plot
ggplot(site_results_clean, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in diversity metrics over time by site - Thames Estuary (seine net)",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Site ranking
site_ranked_sig <- site_results_clean %>%
  filter(Significance != "") %>%  # Keep only significant trends
  mutate(
    Direction = ifelse(Slope > 0, "Increasing", "Decreasing"),
    Strength = abs(Slope)) %>%
  group_by(Metric) %>%
  arrange(desc(P_value)) %>%
  mutate(Rank = row_number()) %>%
  ungroup()

site_ranked_sig %>%
  arrange(Metric, Rank) %>%
  select(Metric, SITE_PARENT_NAME, Slope, Direction, P_value, Significance) %>%
  gt(groupname_col = "Metric") %>%
  fmt_number(columns = c(Slope, P_value), decimals = 3) %>%
  tab_header(
    title = "Sites with significant trends per diversity metric (seine net)",
    subtitle = "Ranked by magnitude of change") %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    Slope = "Slope",
    Direction = "Trend",
    P_value = "p-value",
    Significance = "Significance")
Sites with significant trends per diversity metric (seine net)
Ranked by magnitude of change
Site Slope Trend p-value Significance
Pielou_Evenness
Richmond −0.001 Decreasing 0.029 *
Greenwich −0.001 Decreasing 0.022 *
Shannon_Diversity
Mucking −0.058 Decreasing 0.002 **
Simpson_Diversity
West Thurrock −0.002 Decreasing 0.036 *
Species_Richness
Greenhithe 3.000 Increasing 0.000 ***
Total_Abundance
Greenwich −6.276 Decreasing 0.038 *
Richmond 23.970 Increasing 0.022 *

Which species are changing in abundance over time?

# Estuary
species_trends <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
  summarise(Total_Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Fit linear models for each species
species_model_summary <- function(df) {
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Total_Abundance, na.rm = TRUE) == 0) {
    return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
  }
  model <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = df)
  s <- summary(model)
  slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
  se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply model per species
species_results <- species_trends %>%
  group_by(SPECIES_NAME) %>%
  group_modify(~ species_model_summary(.x)) %>%
  ungroup()

# Filter significant results
species_results_clean <- species_results %>%
  filter(!is.na(Slope) & Significance != "")

# Plot
ggplot(species_results_clean, aes(x = Slope, y = reorder(SPECIES_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  labs(
    title = "Species-level trends in abundance over time in the Thames Estuary (seine net)",
    subtitle = "Slope ± 95% CI | Linear model per species",
    x = "Slope (Change in abundance per year)",
    y = "Species",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

#Zone
species_zone_trends <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, TOP_TIER_SITE, SPECIES_NAME) %>%
  summarise(Total_Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

#Model
species_model_summary <- function(df) {
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Total_Abundance, na.rm = TRUE) == 0) {
    return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
  }
  model <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = df)
  s <- summary(model)
  slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
  se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

species_zone_results <- species_zone_trends %>%
  group_by(TOP_TIER_SITE, SPECIES_NAME) %>%
  group_modify(~ species_model_summary(.x)) %>%
  ungroup()

species_zone_results_clean <- species_zone_results %>%
  filter(!is.na(Slope) & Significance != "")

# Plot
ggplot(species_zone_results_clean, aes(x = Slope, y = reorder(SPECIES_NAME, Slope), color = Significance)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_grid(~ TOP_TIER_SITE, scales = "free_y") +
  labs(
    title = "Species-level abundance trends by estuary zone (seine net)",
    subtitle = "Slopes from per-zone linear models ± 95% CI",
    x = "Slope (Change in abundance per year)",
    y = "Species",
    color = "Significance",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal(base_size = 12) +
  theme(strip.text = element_text(face = "bold", size = 10))

# Site
species_site_trends <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME, SPECIES_NAME) %>%
  summarise(Total_Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Model
species_model_summary <- function(df) {
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Total_Abundance, na.rm = TRUE) == 0) {
    return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
  }
  model <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = df)
  s <- summary(model)
  slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
  se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

species_site_results <- species_site_trends %>%
  group_by(SITE_PARENT_NAME, SPECIES_NAME) %>%
  group_modify(~ species_model_summary(.x)) %>%
  ungroup()

species_site_results_clean <- species_site_results %>%
  filter(!is.na(Slope) & Significance != "")

# Plot
ggplot(species_site_results_clean,
       aes(x = Slope, y = reorder(SPECIES_NAME, Slope), color = Significance)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y", nrow = 5) +
  labs(
    title = "Species-level abundance trends by site (seine net)",
    subtitle = "Slopes from per-site linear models ± 95% CI",
    x = "Slope (Change in abundance per year)",
    y = "Species",
    color = "Significance",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal(base_size = 12) +
  theme(
    strip.text = element_text(face = "bold", size = 10),
    axis.text.y = element_text(size = 6))

What are the patterns in temporal beta diversity?

# Filter
fish_clean <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME), !is.na(FISH_COUNT))

# Presence/absence
fish_clean <- fish_clean %>%
  mutate(PA = as.numeric(FISH_COUNT > 0))

# Summarise
year_species_pa <- fish_clean %>%
  group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
  summarise(present = as.numeric(any(PA == 1)), .groups = "drop") %>%
  pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0) %>%
  column_to_rownames("EVENT_DATE_YEAR")

# Matrix
year_species_pa <- as.data.frame(year_species_pa)

# betapart core object
beta_core <- betapart.core(year_species_pa)

# Sørensen-based beta diversity components
beta_res <- beta.pair(beta_core, index.family = "sor")

# Extract matrices
sor_mat <- as.matrix(beta_res$beta.sor)
sim_mat <- as.matrix(beta_res$beta.sim)
sne_mat <- as.matrix(beta_res$beta.sne)

# Extract years in chronological order
years <- sort(as.numeric(rownames(sor_mat)))

# Create a data frame
beta_trends <- tibble(
  Year1 = years[-length(years)],
  Year2 = years[-1],
  beta_sor = map2_dbl(Year1, Year2, ~ sor_mat[as.character(.x), as.character(.y)]),
  beta_sim = map2_dbl(Year1, Year2, ~ sim_mat[as.character(.x), as.character(.y)]),
  beta_sne = map2_dbl(Year1, Year2, ~ sne_mat[as.character(.x), as.character(.y)]))

# Linear models by component
lm_sor <- lm(beta_sor ~ Year2, data = beta_trends)
lm_sim <- lm(beta_sim ~ Year2, data = beta_trends)
lm_sne <- lm(beta_sne ~ Year2, data = beta_trends)

# Extract R² and p-values
get_lm_stats <- function(model) {
  summary_model <- summary(model)
  r2 <- round(summary_model$r.squared, 3)
  p <- round(summary_model$coefficients[2, 4], 3)
  return(list(r2 = r2, p = p))
}

sor_stats <- get_lm_stats(lm_sor)
sim_stats <- get_lm_stats(lm_sim)
sne_stats <- get_lm_stats(lm_sne)

beta_trends_long <- beta_trends %>%
  pivot_longer(cols = c(beta_sor, beta_sim, beta_sne),
               names_to = "Component",
               values_to = "Dissimilarity")

beta_trends_long <- beta_trends_long %>%
  mutate(Component = recode(Component,
                            beta_sor = "Sorensen",
                            beta_sim = "Turnover",
                            beta_sne = "Nestedness"))


# R² and p-values
subtitle_text <- paste0(
  "Sorensen: R² = ", sor_stats$r2, ", p = ", sor_stats$p, "   |   ",
  "Turnover: R² = ", sim_stats$r2, ", p = ", sim_stats$p, "   |   ",
  "Nestedness: R² = ", sne_stats$r2, ", p = ", sne_stats$p)

# Plot
ggplot(beta_trends_long, aes(x = Year2, y = Dissimilarity, color = Component)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  scale_color_manual(values = c("Sorensen" = "darkorchid", 
                                "Turnover" = "steelblue", 
                                "Nestedness" = "darkorange")) +
  labs(
    title = "Year-to-year change in beta diversity components in the Thames Estuary (seine net)",
    subtitle = subtitle_text,
    x = "Year", y = "Dissimilarity",
    color = "Component") +
  theme_minimal()

# Zone
fish_clean <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME), !is.na(FISH_COUNT)) %>%
  mutate(PA = as.numeric(FISH_COUNT > 0))

zone_list <- unique(fish_clean$TOP_TIER_SITE)
all_beta_trends <- list()

# Model
for (zone in zone_list) {
  cat("Processing zone:", zone, "\n")
  
  zone_data <- fish_clean %>%
    filter(TOP_TIER_SITE == zone)
  
  zone_pa <- zone_data %>%
    group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
    summarise(present = as.numeric(any(PA == 1)), .groups = "drop") %>%
    pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0)
  
  if (nrow(zone_pa) >= 3) {
    zone_matrix <- zone_pa %>%
      column_to_rownames("EVENT_DATE_YEAR") %>%
      as.data.frame()
    
    if (!all(rowSums(zone_matrix) == 0)) {
      beta_core <- betapart.core(zone_matrix)
      beta_res <- beta.pair(beta_core, index.family = "sor")
      
      sor_mat <- as.matrix(beta_res$beta.sor)
      sim_mat <- as.matrix(beta_res$beta.sim)
      sne_mat <- as.matrix(beta_res$beta.sne)
      years <- sort(as.numeric(rownames(sor_mat)))
      
      # Get year-to-year dissimilarities
      beta_trends <- tibble(
        Year1 = years[-length(years)],
        Year2 = years[-1],
        beta_sor = map2_dbl(Year1, Year2, ~ sor_mat[as.character(.x), as.character(.y)]),
        beta_sim = map2_dbl(Year1, Year2, ~ sim_mat[as.character(.x), as.character(.y)]),
        beta_sne = map2_dbl(Year1, Year2, ~ sne_mat[as.character(.x), as.character(.y)])
      ) %>%
        pivot_longer(cols = c(beta_sor, beta_sim, beta_sne),
                     names_to = "Component", values_to = "Dissimilarity") %>%
        mutate(TOP_TIER_SITE = zone)
      
      all_beta_trends[[zone]] <- beta_trends
    }
  }
}

# Combine results
beta_trends_all <- bind_rows(all_beta_trends)

# Clean component labels
beta_trends_all <- beta_trends_all %>%
  mutate(Component = recode(Component,
                            beta_sor = "Sorensen",
                            beta_sim = "Turnover",
                            beta_sne = "Nestedness"))

# R² and p-values
lm_stats_list <- beta_trends_all %>%
  group_by(TOP_TIER_SITE, Component) %>%
  summarise(
    lm_model = list(lm(Dissimilarity ~ Year2)),
    .groups = "drop") %>%
  mutate(
    summary = map(lm_model, summary),
    r2 = map_dbl(summary, ~ round(.x$r.squared, 3)),
    p = map_dbl(summary, ~ round(.x$coefficients[2, 4], 3))) %>%
  select(TOP_TIER_SITE, Component, r2, p)


facet_grid(rows = vars(TOP_TIER_SITE))

beta_trends_all <- beta_trends_all %>%
  mutate(
    TOP_TIER_ZONE = factor(TOP_TIER_SITE, levels = c("Thames Upper", "Thames Middle", "Thames Lower")),
  )

# Subtitle
zone_order <- c("Thames Upper", "Thames Middle", "Thames Lower")

subtitle_text <- lm_stats_list %>%
  mutate(
    label = paste0(
      "<span style='color:",
      case_when(Component == "Sorensen" ~ "darkorchid",
                Component == "Turnover" ~ "steelblue",
                Component == "Nestedness" ~ "darkorange"),
      "'>", Component, "</span>: R² = ", r2, ", p = ", p)) %>%
  group_by(TOP_TIER_SITE) %>%
  summarise(text = paste(label, collapse = " &nbsp;&nbsp;|&nbsp;&nbsp; "), .groups = "drop") %>%
  mutate(TOP_TIER_SITE = factor(TOP_TIER_SITE, levels = zone_order)) %>%
  arrange(TOP_TIER_SITE) %>%
  summarise(subtitle = paste(text, collapse = "<br><br>")) %>%
  pull(subtitle)

# Plot
ggplot(beta_trends_all, aes(x = Year2, y = Dissimilarity, color = Component)) +
  geom_line(size = 1) +
  geom_point(size = 1.5) +
  facet_grid(rows = vars(TOP_TIER_ZONE), scales = "free_y") +
  scale_color_manual(values = c("Sorensen" = "darkorchid",
                                "Turnover" = "steelblue",
                                "Nestedness" = "darkorange")) +
  labs(
    title = "Year-to-year beta diversity trends by zone (seine net)",
    subtitle = subtitle_text,
    x = "Year", y = "Dissimilarity",
    color = "Component") +
  theme_minimal() +
  theme(
    plot.subtitle = ggtext::element_markdown(size = 10),
    strip.text = element_text(face = "bold", size = 10),
    axis.text.x = element_text(angle = 45, hjust = 1))

# Site
fish_clean <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME), !is.na(FISH_COUNT)) %>%
  mutate(PA = as.numeric(FISH_COUNT > 0))

site_list <- unique(fish_clean$SITE_PARENT_NAME)

site_beta_trends <- list()

# Model
for (site in site_list) {
  cat("Processing site:", site, "\n")
  
  site_data <- fish_clean %>%
    filter(SITE_PARENT_NAME == site)
  
  site_pa <- site_data %>%
    group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
    summarise(present = as.numeric(any(PA == 1)), .groups = "drop") %>%
    pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0)
  
  if (nrow(site_pa) >= 3) {
    site_matrix <- site_pa %>%
      column_to_rownames("EVENT_DATE_YEAR") %>%
      as.data.frame()
    
    if (!all(rowSums(site_matrix) == 0)) {
      beta_core <- betapart.core(site_matrix)
      beta_res <- beta.pair(beta_core, index.family = "sor")
      
      sor_mat <- as.matrix(beta_res$beta.sor)
      sim_mat <- as.matrix(beta_res$beta.sim)
      sne_mat <- as.matrix(beta_res$beta.sne)
      years <- sort(as.numeric(rownames(sor_mat)))
      
      beta_trends <- tibble(
        Year1 = years[-length(years)],
        Year2 = years[-1],
        beta_sor = map2_dbl(Year1, Year2, ~ sor_mat[as.character(.x), as.character(.y)]),
        beta_sim = map2_dbl(Year1, Year2, ~ sim_mat[as.character(.x), as.character(.y)]),
        beta_sne = map2_dbl(Year1, Year2, ~ sne_mat[as.character(.x), as.character(.y)])
      ) %>%
        pivot_longer(cols = c(beta_sor, beta_sim, beta_sne),
                     names_to = "Component", values_to = "Dissimilarity") %>%
        mutate(SITE_PARENT_NAME = site)
      
      site_beta_trends[[site]] <- beta_trends
    }
  }
}

beta_trends_site <- bind_rows(site_beta_trends) %>%
  mutate(Component = recode(Component,
                            beta_sor = "Sorensen",
                            beta_sim = "Turnover",
                            beta_sne = "Nestedness"))
# R² and p-values
lm_stats_list <- beta_trends_site %>%
  group_by(SITE_PARENT_NAME, Component) %>%
  summarise(lm_model = list(lm(Dissimilarity ~ Year2)), .groups = "drop") %>%
  mutate(
    summary = map(lm_model, summary),
    r2 = map_dbl(summary, ~ round(.x$r.squared, 3)),
    p = map_dbl(summary, ~ round(.x$coefficients[2, 4], 3))
  ) %>%
  select(SITE_PARENT_NAME, Component, r2, p)

ggplot(beta_trends_site, aes(x = Year2, y = Dissimilarity, color = Component)) +
  geom_line(size = 1) +
  geom_point(size = 1.5) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y", ncol = 4) +
  scale_color_manual(values = c("Sorensen" = "darkorchid",
                                "Turnover" = "steelblue",
                                "Nestedness" = "darkorange")) +
  labs(
    title = "Year-to-year beta diversity trends by site (seine net)",
    x = "Year", y = "Dissimilarity",
    color = "Component") +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 9),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom")

# Rank and table
lm_stats_list_site <- beta_trends_site %>%
  group_by(SITE_PARENT_NAME, Component) %>%
  summarise(
    lm_model = list(lm(Dissimilarity ~ Year2)),
    .groups = "drop") %>%
  mutate(
    summary = map(lm_model, summary),
    slope = map_dbl(summary, ~ coef(.x)["Year2", "Estimate"]),
    r2 = map_dbl(summary, ~ round(.x$r.squared, 3)),
    p = map_dbl(summary, ~ round(coef(.x)["Year2", "Pr(>|t|)"], 3)),
    Significant = p <= 0.05,
    Direction = case_when(
      Significant & slope > 0 ~ "Increasing",
      Significant & slope < 0 ~ "Decreasing",
      TRUE ~ "Stable")) %>%
  select(SITE_PARENT_NAME, Component, slope, r2, p, Significant, Direction)

beta_trend_summary_site <- lm_stats_list_site %>%
  arrange(Component, Direction, SITE_PARENT_NAME)

beta_trend_summary_site_sig <- beta_trend_summary_site %>%
  filter(Significant == TRUE)

beta_trend_summary_site_sig %>%
  gt(groupname_col = "Component") %>%
  fmt_number(columns = c(slope, r2, p), decimals = 3) %>%
  tab_header(
    title = "Significant beta diversity trends by site (seine net)",
    subtitle = "Only sites with p ≤ 0.05 for Sorensen, Turnover, or Nestedness components") %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    slope = "Slope",
    r2 = "R²",
    p = "p-value",
    Direction = "Trend") %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    heading.subtitle.font.size = 11,
    row.striping.include_table_body = TRUE)
# Clean presence-absence matrix across years
pa_matrix_clean <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME)) %>%
  mutate(present = if_else(FISH_COUNT > 0, 1, 0)) %>%
  group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
  summarise(present = max(present), .groups = "drop") %>%
  pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0)

# Set rownames
rownames(pa_matrix_clean) <- pa_matrix_clean$EVENT_DATE_YEAR
pa_matrix_clean <- pa_matrix_clean[, -1]

# Initialize species turnover lists
site_names <- rownames(pa_matrix_clean)
species_lost_list <- list()
species_gained_list <- list()

for (i in 1:(length(site_names) - 1)) {
  for (j in (i + 1):length(site_names)) {
    site1 <- pa_matrix_clean[site_names[i], ]
    site2 <- pa_matrix_clean[site_names[j], ]
    
    site1[is.na(site1)] <- 0
    site2[is.na(site2)] <- 0
    
    lost <- names(site1)[site1 == 1 & site2 == 0]
    gained <- names(site2)[site2 == 1 & site1 == 0]
    
    species_lost_list <- c(species_lost_list, lost)
    species_gained_list <- c(species_gained_list, gained)
  }
}

# Create frequency tables
lost_summary <- sort(table(unlist(species_lost_list)), decreasing = TRUE)
gained_summary <- sort(table(unlist(species_gained_list)), decreasing = TRUE)

# Create data frames
lost_df <- as.data.frame(lost_summary[1:50])
gained_df <- as.data.frame(gained_summary[1:50])
names(lost_df) <- c("species", "count")
names(gained_df) <- c("species", "count")

# Flip lost counts negative for bidirectional plotting
lost_df$count <- -lost_df$count
lost_df$type <- "Lost"
gained_df$type <- "Gained"

# Combine and reorder
combined_df <- bind_rows(lost_df, gained_df)
combined_df$species <- fct_reorder(combined_df$species, abs(combined_df$count))

# Prepare model data
model_df <- combined_df %>%
  mutate(
    abs_count = abs(count),
    type = factor(type))

glm_species <- glm(abs_count ~ species + type, family = quasipoisson, data = model_df)

tidy_glm <- tidy(glm_species)

sig_species <- tidy_glm %>%
  filter(grepl("^species", term)) %>%
  filter(p.value < 0.05) %>%
  mutate(species = gsub("^species", "", term))

plot_df <- model_df %>%
  filter(species %in% sig_species$species) %>%
  mutate(
    direction_count = if_else(type == "Lost", -abs_count, abs_count))

ggplot(plot_df, aes(x = direction_count, y = fct_reorder(species, abs(direction_count)), fill = type)) +
  geom_col() +
  scale_fill_manual(values = c("Lost" = "gray60", "Gained" = "darkgreen")) +
  scale_x_continuous(labels = abs) +
  labs(
    title = "Significant fish species turnover in the Thames Estuary",
    x = "Turnover Frequency (← Lost | Gained →)",
    y = NULL,
    fill = "Type") +
  theme_minimal(base_size = 13)

# Get unique zones
zones <- unique(counts_thames_filtered$TOP_TIER_SITE)

# Store all zone plots
zone_plots <- list()

for (zone in zones) {
  message("Processing zone: ", zone)
  
  # Filter data for zone
  zone_data <- counts_thames_filtered %>%
    filter(TOP_TIER_SITE == zone, !is.na(SPECIES_NAME)) %>%
    mutate(present = if_else(FISH_COUNT > 0, 1, 0))
  
  # Create presence-absence matrix by year
  pa_matrix_clean <- zone_data %>%
    group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
    summarise(present = max(present), .groups = "drop") %>%
    pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0)
  
  # Skip zones with fewer than 2 years of data
  if (nrow(pa_matrix_clean) < 2) next
  
  rownames(pa_matrix_clean) <- pa_matrix_clean$EVENT_DATE_YEAR
  pa_matrix_clean <- pa_matrix_clean[, -1]
  
  # Initialize species turnover lists
  site_names <- rownames(pa_matrix_clean)
  species_lost_list <- list()
  species_gained_list <- list()
  
  for (i in 1:(length(site_names) - 1)) {
    for (j in (i + 1):length(site_names)) {
      site1 <- pa_matrix_clean[site_names[i], ]
      site2 <- pa_matrix_clean[site_names[j], ]
      site1[is.na(site1)] <- 0
      site2[is.na(site2)] <- 0
      
      lost <- names(site1)[site1 == 1 & site2 == 0]
      gained <- names(site2)[site2 == 1 & site1 == 0]
      
      species_lost_list <- c(species_lost_list, lost)
      species_gained_list <- c(species_gained_list, gained)
    }
  }
  
  # Summarize turnover frequency
  lost_summary <- sort(table(unlist(species_lost_list)), decreasing = TRUE)
  gained_summary <- sort(table(unlist(species_gained_list)), decreasing = TRUE)
  
  if (length(lost_summary) == 0 & length(gained_summary) == 0) next
  
  lost_df <- as.data.frame(lost_summary)
  gained_df <- as.data.frame(gained_summary)
  names(lost_df) <- c("species", "count")
  names(gained_df) <- c("species", "count")
  
  lost_df$count <- -lost_df$count
  lost_df$type <- "Lost"
  gained_df$type <- "Gained"
  
  combined_df <- bind_rows(lost_df, gained_df)
  combined_df$species <- fct_reorder(combined_df$species, abs(combined_df$count))
  
  model_df <- combined_df %>%
    mutate(abs_count = abs(count), type = factor(type))
  
  glm_species <- glm(abs_count ~ species + type, family = quasipoisson, data = model_df)
  tidy_glm <- tidy(glm_species)
  
  sig_species <- tidy_glm %>%
    filter(grepl("^species", term)) %>%
    filter(p.value < 0.05) %>%
    mutate(species = gsub("^species", "", term))
  
  plot_df <- model_df %>%
    filter(species %in% sig_species$species) %>%
    mutate(direction_count = if_else(type == "Lost", -abs_count, abs_count))
  
  # Skip if no significant species
  if (nrow(plot_df) == 0) next
  
  # Plot for this zone
  p <- ggplot(plot_df, aes(x = direction_count, y = fct_reorder(species, abs(direction_count)), fill = type)) +
    geom_col() +
    scale_fill_manual(values = c("Lost" = "gray60", "Gained" = "darkgreen")) +
    scale_x_continuous(labels = abs) +
    labs(
      title = paste("Significant fish species turnover in the", zone, "Zone"),
      x = "Turnover Frequency (← Lost | Gained →)",
      y = NULL,
      fill = "Type"
    ) +
    theme_minimal(base_size = 13)
  
  zone_plots[[zone]] <- p
}

# Print all zone plots
zone_plots
## $`Thames Upper`

## 
## $`Thames Middle`

# Get unique sites
sites <- unique(counts_thames_filtered$SITE_PARENT_NAME)

# Store plots
site_plots <- list()

for (site in sites) {
  message("Processing site: ", site)
  
  # Filter for site and clean
  site_data <- counts_thames_filtered %>%
    filter(SITE_PARENT_NAME == site, !is.na(SPECIES_NAME)) %>%
    mutate(present = if_else(FISH_COUNT > 0, 1, 0))
  
  # Create presence-absence matrix by year
  pa_matrix_clean <- site_data %>%
    group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
    summarise(present = max(present), .groups = "drop") %>%
    pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0)
  
  # Skip if only 1 year
  if (nrow(pa_matrix_clean) < 2) next
  
  rownames(pa_matrix_clean) <- pa_matrix_clean$EVENT_DATE_YEAR
  pa_matrix_clean <- pa_matrix_clean[, -1]
  
  # Initialize turnover lists
  site_years <- rownames(pa_matrix_clean)
  species_lost_list <- list()
  species_gained_list <- list()
  
  for (i in 1:(length(site_years) - 1)) {
    for (j in (i + 1):length(site_years)) {
      site1 <- pa_matrix_clean[site_years[i], ]
      site2 <- pa_matrix_clean[site_years[j], ]
      site1[is.na(site1)] <- 0
      site2[is.na(site2)] <- 0
      
      lost <- names(site1)[site1 == 1 & site2 == 0]
      gained <- names(site2)[site2 == 1 & site1 == 0]
      
      species_lost_list <- c(species_lost_list, lost)
      species_gained_list <- c(species_gained_list, gained)
    }
  }
  
  # Skip if no turnover
  if (length(species_lost_list) == 0 && length(species_gained_list) == 0) next
  
  # Summarize and prepare for model
  lost_df <- as.data.frame(table(unlist(species_lost_list)))
  gained_df <- as.data.frame(table(unlist(species_gained_list)))
  names(lost_df) <- c("species", "count")
  names(gained_df) <- c("species", "count")
  lost_df$count <- -lost_df$count
  lost_df$type <- "Lost"
  gained_df$type <- "Gained"
  
  combined_df <- bind_rows(lost_df, gained_df)
  
  # Create model input
  model_df <- combined_df %>%
    mutate(abs_count = abs(count),
           type = factor(type))
  
  # Fit model
  glm_species <- glm(abs_count ~ species + type, family = quasipoisson, data = model_df)
  tidy_glm <- tidy(glm_species)
  
  sig_species <- tidy_glm %>%
    filter(grepl("^species", term)) %>%
    filter(p.value < 0.05) %>%
    mutate(species = gsub("^species", "", term))
  
  if (nrow(sig_species) == 0) {
    message("→ No significant species at ", site)
    next
  }
  
  plot_df <- model_df %>%
    filter(species %in% sig_species$species) %>%
    mutate(
      direction_count = if_else(type == "Lost", -abs_count, abs_count),
      species = fct_reorder(species, abs(direction_count))
    )
  
  # Plot
  p <- ggplot(plot_df, aes(x = direction_count, y = species, fill = type)) +
    geom_col() +
    scale_fill_manual(values = c("Lost" = "gray60", "Gained" = "darkgreen")) +
    scale_x_continuous(labels = abs) +
    labs(
      title = paste("Significant fish species turnover at", site),
      x = "Turnover Frequency (← Lost | Gained →)",
      y = NULL,
      fill = "Type"
    ) +
    theme_minimal(base_size = 13)
  
  site_plots[[site]] <- p
}

site_plots
## $Battersea

## 
## $Greenwich

## 
## $Kew

## 
## $Richmond

## 
## $`Teddington Weir`

## 
## $Mucking

Length data

Data wrangling

EA sampling sites

# Load data
setwd("~/PhD/London NERC DTP documents/EA data/EnvironmentAgency_EcologyFish_DataDownload_2025-04-02")

lengths <- read_csv("TraC_Fish_Individual_Lengths_2025-04-02.csv")
sites <- read_csv("TraC_Fish_Sites_2025-04-02.csv")
lengths_clean <- lengths %>% left_join(sites, by = "SITE_ID")

lengths_clean <- lengths_clean %>%
  select(
    SITE_ID,
    SITE_NAME = SITE_NAME.x,
    SITE_PARENT_NAME,
    EVENT_DATE,
    SPECIES_NAME,
    LATIN_NAME,
    FISH_LENGTH,
    TOP_TIER_SITE,
    SITE_RANKED_NGR,
    SITE_RANKED_EASTING,
    SITE_RANKED_NORTHING)

# Convert dates and extract components
lengths_clean <- lengths_clean %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_YEAR = year(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    EVENT_DAY = day(EVENT_DATE))

# Sampling method
lengths_clean <- lengths_clean %>%
  mutate(
    METHOD = case_when(
      str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
      str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
      str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
      str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
      str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
      str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
      str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
      str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
      TRUE ~ "Unknown"))

# Thames only
lengths_thames <- lengths_clean %>%
  filter(str_detect(TOP_TIER_SITE, regex("thames", ignore_case = TRUE)))

# Lat and long
sites_sf <- st_as_sf(lengths_thames, coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
  st_transform(crs = 4326)
coords <- st_coordinates(sites_sf)
sites_sf$lon <- coords[, 1]
sites_sf$lat <- coords[, 2]

# Gears
sites_sf <- sites_sf %>%
  mutate(
    GEAR_TYPE = case_when(
      str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
      str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
      str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
      str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
      str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
      str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
      str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
      str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
      TRUE ~ "Unknown"))

# Map
gear_types <- unique(sites_sf$GEAR_TYPE)
gear_pal <- colorFactor(palette = "Set2", domain = gear_types)

leaflet(sites_sf) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircleMarkers(
    lng = ~lon, lat = ~lat,
    color = ~gear_pal(GEAR_TYPE),
    radius = 6,
    stroke = TRUE, weight = 1, fillOpacity = 0.9,
    popup = ~paste0(
      "<b>Site:</b> ", SITE_NAME, "<br>",
      "<b>Zone:</b> ", TOP_TIER_SITE, "<br>",
      "<b>Gear:</b> ", GEAR_TYPE), group = ~GEAR_TYPE) %>%
  addLegend(
    position = "bottomright",
    pal = gear_pal,
    values = ~GEAR_TYPE,
    title = "Gear Type",
    opacity = 1) %>%
  addLayersControl(
    overlayGroups = gear_types,
    options = layersControlOptions(collapsed = FALSE),
    position = "topright")
# Sampling method summary
lengths_gear <- lengths_thames %>%
  mutate(
    EVENT_YEAR = lubridate::year(lubridate::ymd(EVENT_DATE))) %>%
  group_by(SPECIES_NAME) %>%
  mutate(
    juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(is_juvenile = FISH_LENGTH <= juvenile_threshold)

gear_summary <- lengths_gear %>%
  group_by(METHOD) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    median_length = median(FISH_LENGTH, na.rm = TRUE),
    prop_juvenile = mean(is_juvenile, na.rm = TRUE),
    n_fish = n(),
    .groups = "drop")

gear_summary <- gear_summary %>%
  arrange(desc(prop_juvenile)) %>%
  mutate(METHOD = factor(METHOD, levels = unique(METHOD)))

glm_model <- glm(is_juvenile ~ METHOD, data = lengths_gear, family = "binomial")

p_vals <- tidy(glm_model) %>%
  filter(term != "(Intercept)") %>%
  mutate(
    gear = gsub("METHOD", "", term),
    label = paste0("p = ", signif(p.value, 2)))

ggplot(gear_summary, aes(x = METHOD, y = prop_juvenile)) +
  geom_col(fill = "tomato") +
  labs(
    title = "Proportion of juveniles by sampling method",
    x = "Sampling method", y = "Proportion juvenile") +
  ylim(0, 1) +
  theme_minimal()

# Species per gear
gear_summary <- lengths_thames %>%
  group_by(METHOD) %>%
  summarise(
    species_richness = n_distinct(SPECIES_NAME),
    total_fish = n(),
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    juvenile_proportion = mean(FISH_LENGTH <= quantile(FISH_LENGTH, 0.25, na.rm = TRUE), na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(species_richness))

print(gear_summary)

gear_species_table <- lengths_thames %>%
  count(METHOD, SPECIES_NAME) %>%
  pivot_wider(names_from = METHOD, values_from = n, values_fill = 0)

gear_pa <- gear_species_table %>%
  mutate(across(-SPECIES_NAME, ~ as.integer(. > 0)))

gear_cols <- names(gear_pa)[!names(gear_pa) %in% "SPECIES_NAME"]

gear_unique <- gear_pa %>%
  rowwise() %>%
  mutate(unique_to = if (sum(c_across(all_of(gear_cols))) == 1) {
    gear_cols[which(c_across(all_of(gear_cols)) == 1)]
  } else {
    NA_character_
  }) %>%
  ungroup() %>%
  filter(!is.na(unique_to))

unique_counts <- gear_unique %>%
  count(unique_to, name = "unique_species_count") %>%
  arrange(desc(unique_species_count))

ggplot(unique_counts, aes(x = reorder(unique_to, unique_species_count), y = unique_species_count, fill = unique_to)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Number of species unique to each gear type",
    x = "Gear type", y = "Unique species count") +
  theme_minimal()

gear_unique %>%
  arrange(unique_to, SPECIES_NAME)

gear_pa_long <- gear_pa %>%
  pivot_longer(cols = -SPECIES_NAME, names_to = "METHOD", values_to = "present")

ggplot(gear_pa_long, aes(x = METHOD, y = SPECIES_NAME, fill = as.factor(present))) +
  geom_tile(color = "white") +
  scale_fill_manual(values = c("0" = "white", "1" = "steelblue")) +
  labs(
    title = "Species presence across gear types",
    x = "Gear type", y = "Species",
    fill = "Present") +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 7))

Focusing on seine net sampling only

lengths_thames_filtered <- lengths_thames %>%
  filter(METHOD %in% c("Seine Net", "Kick Net"))

# Check for NAs or 0-length values
lengths_thames_filtered %>%
  filter(is.na(FISH_LENGTH) | FISH_LENGTH == 0)
# Boxplot for outliers
ggplot(lengths_thames_filtered, aes(x = METHOD, y = FISH_LENGTH)) +
  geom_boxplot(outlier.colour = "red", outlier.shape = 16, outlier.size = 2) +
  coord_flip() +
  theme_minimal() +
  labs(
    title = "Boxplot of fish lengths by gear type",
    x = "Gear type",
    y = "Fish length (mm)")

ggplot(lengths_thames_filtered, aes(x = TOP_TIER_SITE, y = FISH_LENGTH)) +
  geom_boxplot(outlier.colour = "red") +
  theme_minimal() +
  labs(title = "Fish lengths by estuary zone (seine net)")

# Remove outliers
Q1 <- quantile(lengths_thames_filtered$FISH_LENGTH, 0.25, na.rm = TRUE)
Q3 <- quantile(lengths_thames_filtered$FISH_LENGTH, 0.75, na.rm = TRUE)
IQR_value <- Q3 - Q1

lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value

lengths_thames_no_outliers <- lengths_thames_filtered %>%
  filter(FISH_LENGTH >= lower_bound, FISH_LENGTH <= upper_bound)

lengths_thames_filtered <- lengths_thames_no_outliers

ggplot(lengths_thames_filtered, aes(x = FISH_LENGTH)) +
  geom_histogram(bins = 100, fill = "darkorange") +
  labs(title = "Distribution of fish lengths (seine net)", x = "Fish length (mm)", y = "Frequency") +
  theme_minimal()

Are fish getting larger in the Thames Estuary?

#Estuary
length_stats_year <- lengths_thames_filtered %>%
  group_by(EVENT_YEAR) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    median_length = median(FISH_LENGTH, na.rm = TRUE),
    n_fish = n()) %>%
  arrange(EVENT_YEAR)

length_stats_species_year <- lengths_thames %>%
  group_by(EVENT_YEAR, SPECIES_NAME) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    median_length = median(FISH_LENGTH, na.rm = TRUE),
    n_fish = n()) %>%
  ungroup() %>%
  arrange(EVENT_YEAR, SPECIES_NAME)

# Model
mean_lm <- lm(mean_length ~ EVENT_YEAR, data = length_stats_year)
mean_slope <- coef(summary(mean_lm))["EVENT_YEAR", "Estimate"]
mean_p <- coef(summary(mean_lm))["EVENT_YEAR", "Pr(>|t|)"]

# Model
median_lm <- lm(median_length ~ EVENT_YEAR, data = length_stats_year)
median_slope <- coef(summary(median_lm))["EVENT_YEAR", "Estimate"]
median_p <- coef(summary(median_lm))["EVENT_YEAR", "Pr(>|t|)"]

# Plot
ggplot(length_stats_year, aes(x = EVENT_YEAR)) +
  geom_line(aes(y = mean_length, color = "Mean", linetype = "Mean"), size = 1) +
  geom_point(aes(y = mean_length, color = "Mean")) +
  geom_smooth(aes(y = mean_length, color = "Mean", linetype = "Mean"),
              method = "lm", se = FALSE) +
  
  geom_line(aes(y = median_length, color = "Median", linetype = "Median"), size = 1) +
  geom_point(aes(y = median_length, color = "Median")) +
  geom_smooth(aes(y = median_length, color = "Median", linetype = "Median"),
              method = "lm", se = FALSE) +
  
  scale_color_manual(values = c("Mean" = "blue", "Median" = "darkorange")) +
  scale_linetype_manual(values = c("Mean" = "solid", "Median" = "dashed")) +
  
  labs(
    title = "Mean and median fish length in the Thames Estuary (seine net)",
    subtitle = paste0(
      "Mean slope = ", round(mean_slope, 2), " mm/year (p = ", signif(mean_p, 3), "); ",
      "Median slope = ", round(median_slope, 2), " mm/year (p = ", signif(median_p, 3), ")"),
    x = "Year",
    y = "Fish length (mm)",
    color = "Statistic",
    linetype = "Statistic") +
  theme_minimal() +
  theme(legend.position = "top")

# Zone
length_stats_tier <- lengths_thames_filtered %>%
  group_by(EVENT_YEAR, TOP_TIER_SITE) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    median_length = median(FISH_LENGTH, na.rm = TRUE),
    n_fish = n(),
    .groups = "drop") %>%
  arrange(EVENT_YEAR, TOP_TIER_SITE)

length_labels <- length_stats_tier %>%
  nest(data = -TOP_TIER_SITE) %>%
  mutate(
    model_mean = map(data, ~ lm(mean_length ~ EVENT_YEAR, data = .x)),
    model_median = map(data, ~ lm(median_length ~ EVENT_YEAR, data = .x)),
    stats_mean = map(model_mean, tidy),
    stats_median = map(model_median, tidy)) %>%
  transmute(
    TOP_TIER_SITE,
    label = pmap(list(TOP_TIER_SITE, stats_mean, stats_median), function(zone, m, md) {
      slope_mean <- m[m$term == "EVENT_YEAR", "estimate"]
      p_mean <- m[m$term == "EVENT_YEAR", "p.value"]
      slope_median <- md[md$term == "EVENT_YEAR", "estimate"]
      p_median <- md[md$term == "EVENT_YEAR", "p.value"]
      paste0(
        zone, "\n",
        "Mean: ", round(slope_mean, 2), " mm/year (p=", signif(p_mean, 2), ")\n",
        "Median: ", round(slope_median, 2), " mm/year (p=", signif(p_median, 2), ")")})) %>%
  unnest(label)

length_stats_tier$TOP_TIER_SITE <- factor(length_stats_tier$TOP_TIER_SITE, levels = c("Thames Upper", "Thames Middle", "Thames Lower"))

ordered_levels <- c("Thames Upper", "Thames Middle", "Thames Lower")
length_labels$TOP_TIER_SITE <- factor(length_labels$TOP_TIER_SITE, levels = ordered_levels)
length_stats_tier$TOP_TIER_SITE <- factor(length_stats_tier$TOP_TIER_SITE, levels = ordered_levels)

# Plot
ggplot(length_stats_tier, aes(x = EVENT_YEAR)) +
  geom_line(aes(y = mean_length, color = "Mean", linetype = "Mean"), size = 1) +
  geom_point(aes(y = mean_length, color = "Mean")) +
  geom_smooth(aes(y = mean_length, color = "Mean", linetype = "Mean"), method = "lm", se = FALSE) +
  
  geom_line(aes(y = median_length, color = "Median", linetype = "Median"), size = 1) +
  geom_point(aes(y = median_length, color = "Median")) +
  geom_smooth(aes(y = median_length, color = "Median", linetype = "Median"), method = "lm", se = FALSE) +
  
  scale_color_manual(values = c("Mean" = "blue", "Median" = "darkorange")) +
  scale_linetype_manual(values = c("Mean" = "solid", "Median" = "dashed")) +
  
  facet_wrap(~TOP_TIER_SITE, labeller = as_labeller(deframe(length_labels)), scales = "free_y") +
  
  labs(
    title = "Mean and median fish length by estuary zone (seine net)",
    x = "Year",
    y = "Fish length (mm)",
    color = "Statistic",
    linetype = "Statistic") +
  theme_minimal() +
  theme(legend.position = "top")

Are there any changes in juvenile arrival?

lengths_juveniles <- lengths_thames_filtered %>%
  group_by(SPECIES_NAME) %>%
  mutate(
    juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE),
    is_juvenile = FISH_LENGTH <= juvenile_threshold) %>%
  ungroup()

first_juvenile_estuary <- lengths_juveniles %>%
  filter(is_juvenile, !is.na(EVENT_DATE)) %>%
  mutate(JULIAN_DAY = yday(lubridate::ymd(EVENT_DATE))) %>%
  group_by(EVENT_YEAR) %>%
  summarise(
    first_julian_day = min(JULIAN_DAY, na.rm = TRUE),
    .groups = "drop")

model_julian <- lm(first_julian_day ~ EVENT_YEAR, data = first_juvenile_estuary)
model_summary <- summary(model_julian)
slope <- round(coef(model_julian)["EVENT_YEAR"], 2)
pval <- signif(coef(summary(model_julian))["EVENT_YEAR", "Pr(>|t|)"], 3)

ggplot(first_juvenile_estuary, aes(x = EVENT_YEAR, y = first_julian_day)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(color = "steelblue", size = 2) +
  geom_smooth(method = "lm", color = "black", linetype = "dashed", se = TRUE) +
  scale_x_continuous(breaks = seq(min(first_juvenile_estuary$EVENT_YEAR), max(first_juvenile_estuary$EVENT_YEAR), by = 10)) +
  labs(
    title = "Timing of first juvenile appearance in the Thames Estuary (seine net)",
    subtitle = paste0("Slope = ", slope, " days/year, p = ", pval),
    x = "Year",
    y = "Julian Day (first appearance)") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14),
    plot.subtitle = element_text(size = 11))

Is there a sampling date bias?

juvenile_thresholds <- lengths_thames_filtered %>%
  group_by(SPECIES_NAME) %>%
  summarise(
    juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE),
    .groups = "drop")

lengths_tagged <- lengths_thames_filtered %>%
  left_join(juvenile_thresholds, by = "SPECIES_NAME") %>%
  mutate(
    is_juvenile = FISH_LENGTH <= juvenile_threshold)

first_juvenile <- lengths_tagged %>%
  filter(is_juvenile, !is.na(EVENT_DATE)) %>%
  group_by(EVENT_YEAR) %>%
  summarise(
    first_juvenile_day = min(yday(EVENT_DATE)),
    .groups = "drop")

first_sampling <- lengths_thames_filtered %>%
  filter(!is.na(EVENT_DATE)) %>%
  group_by(EVENT_YEAR) %>%
  summarise(
    first_sample_day = min(yday(EVENT_DATE)),
    .groups = "drop")

juvenile_vs_sampling <- left_join(first_juvenile, first_sampling, by = "EVENT_YEAR") %>%
  pivot_longer(cols = c(first_juvenile_day, first_sample_day),
               names_to = "event_type",
               values_to = "julian_day") %>%
  mutate(event_type = recode(event_type,
                             first_juvenile_day = "First juvenile",
                             first_sample_day = "First sample"))

ggplot(juvenile_vs_sampling, aes(x = EVENT_YEAR, y = julian_day, color = event_type)) +
  geom_point(size = 2) +
  geom_line() +
  geom_smooth(method = "lm", se = TRUE, linetype = "dashed") +
  scale_color_manual(values = c("First juvenile" = "blue", "First sample" = "gray40")) +
  labs(
    title = "Comparison of first juvenile appearance vs first sampling date (seine net)",
    x = "Year",
    y = "Julian day",
    color = "Event type") +
  theme_minimal()

Are there any changes in juvenile arrival by zone?

lengths_juveniles <- lengths_thames_filtered %>%
  group_by(SPECIES_NAME) %>%
  mutate(
    juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE),
    is_juvenile = FISH_LENGTH <= juvenile_threshold) %>%
  ungroup()

first_juvenile_julian <- lengths_juveniles %>%
  filter(is_juvenile, !is.na(EVENT_DATE), !is.na(TOP_TIER_SITE)) %>%
  mutate(JULIAN_DAY = yday(lubridate::ymd(EVENT_DATE))) %>%
  group_by(EVENT_YEAR, TOP_TIER_SITE) %>%
  summarise(
    first_julian_day = min(JULIAN_DAY, na.rm = TRUE),
    .groups = "drop")

julian_models <- first_juvenile_julian %>%
  group_by(TOP_TIER_SITE) %>%
  do(tidy(lm(first_julian_day ~ EVENT_YEAR, data = .))) %>%
  filter(term == "EVENT_YEAR") %>%
  mutate(
    slope = round(estimate, 2),
    pval = signif(p.value, 3),
    facet_label = paste0(TOP_TIER_SITE, "\nSlope = ", slope, " days/year, p = ", pval)) %>%
  select(TOP_TIER_SITE, facet_label)

first_juvenile_labeled <- first_juvenile_julian %>%
  left_join(julian_models, by = "TOP_TIER_SITE") %>%
  mutate(
    TOP_TIER_SITE = factor(TOP_TIER_SITE, levels = c("Thames Upper", "Thames Middle", "Thames Lower")),
    facet_label = factor(facet_label, levels = julian_models$facet_label[match(c("Thames Upper", "Thames Middle", "Thames Lower"), julian_models$TOP_TIER_SITE)]))

ggplot(first_juvenile_labeled, aes(x = EVENT_YEAR, y = first_julian_day)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(color = "steelblue", size = 2) +
  geom_smooth(method = "lm", color = "black", linetype = "dashed", se = TRUE) +
  facet_wrap(~ facet_label, scales = "free_y") +
  scale_x_continuous(breaks = seq(min(first_juvenile_labeled$EVENT_YEAR), max(first_juvenile_labeled$EVENT_YEAR), by = 10)) +
  labs(
    title = "Timing of first juvenile appearance by estuary zone (seine net)",
    x = "Year", y = "Julian Day (first appearance)") +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 10),
    plot.title = element_text(size = 14),
    plot.subtitle = element_text(size = 11),
    legend.position = "none")

# Which species?
lengths_tagged <- lengths_thames_filtered %>%
  group_by(SPECIES_NAME) %>%
  mutate(juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(
    is_juvenile = FISH_LENGTH <= juvenile_threshold,
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_YEAR = year(EVENT_DATE),
    julian_day = yday(EVENT_DATE))

first_juvenile_per_species <- lengths_tagged %>%
  filter(is_juvenile, !is.na(TOP_TIER_SITE)) %>%
  group_by(SPECIES_NAME, EVENT_YEAR, TOP_TIER_SITE) %>%
  summarise(
    first_day = min(julian_day, na.rm = TRUE),
    n_juveniles = n(),
    .groups = "drop") %>%
  filter(n_juveniles >= 5)

species_enough_years <- first_juvenile_per_species %>%
  group_by(SPECIES_NAME, TOP_TIER_SITE) %>%
  summarise(n_years = n(), .groups = "drop") %>%
  filter(n_years >= 5)

first_juvenile_filtered <- first_juvenile_per_species %>%
  semi_join(species_enough_years, by = c("SPECIES_NAME", "TOP_TIER_SITE"))

recruitment_trends <- first_juvenile_filtered %>%
  group_by(SPECIES_NAME, TOP_TIER_SITE) %>%
  do(tidy(lm(first_day ~ EVENT_YEAR, data = .))) %>%
  ungroup() %>%
  filter(term == "EVENT_YEAR") %>%
  mutate(
    slope = round(estimate, 2),
    pval = signif(p.value, 3),
    trend = case_when(
      pval < 0.05 & slope < 0 ~ "Earlier (📉)",
      pval < 0.05 & slope > 0 ~ "Later (📈)",
      TRUE ~ "No significant trend"),
    label = paste0(SPECIES_NAME, "\nSlope = ", slope, ", p = ", pval))

recruitment_plot_data <- first_juvenile_filtered %>%
  inner_join(recruitment_trends %>% filter(pval < 0.05),
             by = c("SPECIES_NAME", "TOP_TIER_SITE"))

recruitment_plot_data$TOP_TIER_SITE <- factor(
  recruitment_plot_data$TOP_TIER_SITE,
  levels = c("Thames Upper", "Thames Middle", "Thames Lower"))

ggplot(recruitment_plot_data, aes(x = EVENT_YEAR, y = first_day)) +
  geom_point(color = "darkblue") +
  geom_smooth(method = "lm", se = TRUE, color = "black", linetype = "dashed") +
  facet_grid(TOP_TIER_SITE ~ label, scales = "free_y") +
  labs(
    title = "Species-specific timing of first juvenile appearance (seine net)",
    subtitle = "Only significant trends (p < 0.05)",
    x = "Year", y = "Julian Day (first appearance)") +
  theme_minimal() +
  theme(strip.text = element_text(size = 9))

Are there any changes in growth rate?

seasonal_growth <- lengths_tagged %>%
  filter(is_juvenile, !is.na(EVENT_DATE)) %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    EVENT_YEAR = year(EVENT_DATE)
  ) %>%
  group_by(EVENT_YEAR, EVENT_MONTH) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    n = n(),
    .groups = "drop")

growth_rates <- seasonal_growth %>%
  group_by(EVENT_YEAR) %>%
  summarise(
    growth_model = list(lm(mean_length ~ EVENT_MONTH)),
    .groups = "drop") %>%
  mutate(
    slope_mm_per_month = map_dbl(growth_model, ~ coef(.x)[2]))

growth_trend <- lm(slope_mm_per_month ~ EVENT_YEAR, data = growth_rates)
growth_summary <- tidy(growth_trend)

growth_slope <- round(growth_summary$estimate[2], 3)
growth_p <- signif(growth_summary$p.value[2], 3)

ggplot(growth_rates, aes(x = EVENT_YEAR, y = slope_mm_per_month)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, linetype = "dashed", color = "darkblue") +
  labs(
    title = paste0("Seasonal growth rate over time (seine net)\nSlope = ", growth_slope, " mm/mo/yr, p = ", growth_p),
    x = "Year", y = "Estimated growth rate (mm/month)") +
  theme_minimal()

# Are there any changes in growth rate by zone?
growth_by_zone <- lengths_tagged %>%
  filter(is_juvenile, !is.na(EVENT_DATE), !is.na(TOP_TIER_SITE)) %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    EVENT_YEAR = year(EVENT_DATE)
  ) %>%
  group_by(TOP_TIER_SITE, EVENT_YEAR, EVENT_MONTH) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) %>%
  group_by(TOP_TIER_SITE, EVENT_YEAR) %>%
  summarise(
    slope_mm_per_month = coef(lm(mean_length ~ EVENT_MONTH))[2],
    .groups = "drop"
  )

growth_trends <- growth_by_zone %>%
  group_by(TOP_TIER_SITE) %>%
  do(tidy(lm(slope_mm_per_month ~ EVENT_YEAR, data = .))) %>%
  filter(term == "EVENT_YEAR") %>%
  mutate(
    slope_label = paste0("Slope = ", round(estimate, 2), " mm/mo/yr\np = ", signif(p.value, 2))
  )

zone_labels <- growth_trends %>%
  select(TOP_TIER_SITE, slope_label) %>%
  mutate(
    facet_label = paste0(TOP_TIER_SITE, "\n", slope_label),
    TOP_TIER_SITE = fct_relevel(TOP_TIER_SITE, "Thames Upper", "Thames Middle", "Thames Lower")
  )

growth_by_zone_labeled <- growth_by_zone %>%
  left_join(zone_labels, by = "TOP_TIER_SITE")

growth_by_zone_labeled <- growth_by_zone_labeled %>%
  mutate(
    facet_label = fct_relevel(facet_label, 
                              zone_labels$facet_label[match(c("Thames Upper", "Thames Middle", "Thames Lower"),
                                                            zone_labels$TOP_TIER_SITE)]))

ggplot(growth_by_zone_labeled, aes(x = EVENT_YEAR, y = slope_mm_per_month)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, linetype = "dashed", color = "blue") +
  facet_wrap(~ facet_label, scales = "free_y") +
  labs(
    title = "Seasonal growth rate over time by estuary zone (seine net)",
    x = "Year", y = "Growth rate (mm/month)"
  ) +
  theme_minimal()

Are there seasonal differences in fish length?

## Nesting season within zones and sites
lengths_seasonal <- lengths_thames_filtered %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    season = case_when(
      EVENT_MONTH %in% 1:6 ~ "Early",
      EVENT_MONTH %in% 7:12 ~ "Late")) %>%
  filter(!is.na(season))

#Estuary
model_lsmean_total <- lm(FISH_LENGTH ~ season + EVENT_YEAR, data = lengths_seasonal)

lsmeans_total <- emmeans(model_lsmean_total, ~ season) %>%
  as.data.frame()

ggplot(lsmeans_total, aes(x = season, y = emmean, fill = season)) +
  geom_col() +
  geom_errorbar(aes(ymin = emmean - SE, ymax = emmean + SE), width = 0.2) +
  labs(title = "Fish length by season (seine net)") +
  theme_minimal()

Do these seasonal differences vary by zone or site?

# By zone
model_lsmean_zone <- lm(FISH_LENGTH ~ season * TOP_TIER_SITE + EVENT_YEAR, data = lengths_seasonal)

lsmeans_zone <- emmeans(model_lsmean_zone, ~ season * TOP_TIER_SITE) %>%
  as.data.frame()

ggplot(lsmeans_zone, aes(x = season, y = emmean, fill = season)) +
  geom_col(position = position_dodge()) +
  geom_errorbar(aes(ymin = emmean - SE, ymax = emmean + SE), width = 0.2) +
  facet_wrap(~ TOP_TIER_SITE, scales = "free_y") +
  labs(title = "Fish length by season and zone (seine net)") +
  theme_minimal()

# By site
model_lsmean_site <- lm(FISH_LENGTH ~ season * SITE_PARENT_NAME + EVENT_YEAR, data = lengths_seasonal)

lsmeans_site <- emmeans(model_lsmean_site, ~ season * SITE_PARENT_NAME) %>%
  as.data.frame()

ggplot(lsmeans_site, aes(x = season, y = emmean, fill = season)) +
  geom_col(position = position_dodge()) +
  geom_errorbar(aes(ymin = emmean - SE, ymax = emmean + SE), width = 0.2) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y") +
  labs(title = "Fish length by season and site (seine net)") +
  theme_minimal()

Which species show significant seasonal shifts in size?

#Estuary
species_season_diffs <- lengths_seasonal %>%
  group_by(SPECIES_NAME) %>%
  filter(n_distinct(season) >= 2, n_distinct(EVENT_YEAR) >= 2) %>%
  nest() %>%
  mutate(
    model = map(data, ~ lm(FISH_LENGTH ~ season + EVENT_YEAR, data = .x)),
    emmeans = map(model, ~ emmeans(.x, ~ season)),
    contrast = map(emmeans, ~ contrast(.x, method = "revpairwise") %>% as.data.frame())) %>%
  unnest(contrast) %>%
  rename(estimate_diff = estimate, se = SE, pval = p.value) %>%
  mutate(
    abs_diff = abs(estimate_diff),
    direction = ifelse(estimate_diff > 0, "Late > Early", "Early > Late"),
    sig = ifelse(pval < 0.05, "Yes", "No"))

ggplot(species_season_diffs %>% filter(sig == "Yes"),
       aes(x = reorder(SPECIES_NAME, estimate_diff), y = estimate_diff, fill = direction)) +
  geom_col() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_errorbar(aes(ymin = estimate_diff - se, ymax = estimate_diff + se), width = 0.2) +
  coord_flip() +
  labs(title = "Significant seasonal shifts in fish length by species (seine net)") +
  scale_fill_manual(values = c("Late > Early" = "steelblue", "Early > Late" = "tomato")) +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 10),
    axis.text.y = element_text(size = 7),
    legend.position = "top")

# Zone
species_season_diffs_zone <- lengths_seasonal %>%
  filter(!is.na(TOP_TIER_SITE)) %>%
  group_by(SPECIES_NAME, TOP_TIER_SITE) %>%
  filter(n_distinct(season) == 2, n_distinct(EVENT_YEAR) >= 2) %>%
  nest() %>%
  mutate(
    model = map(data, ~ lm(FISH_LENGTH ~ season + EVENT_YEAR, data = .x)),
    emmeans = map(model, ~ emmeans(.x, ~ season)),
    contrast = map(emmeans, ~ contrast(.x, method = "revpairwise") %>% as.data.frame())) %>%
  unnest(contrast) %>%
  rename(
    estimate_diff = estimate,
    se = SE,
    pval = p.value) %>%
  mutate(
    abs_diff = abs(estimate_diff),
    direction = ifelse(estimate_diff > 0, "Late > Early", "Early > Late"),
    sig = ifelse(pval < 0.05, "Yes", "No"))

ggplot(species_season_diffs_zone %>% filter(sig == "Yes"),
       aes(x = reorder(SPECIES_NAME, estimate_diff), y = estimate_diff, fill = direction)) +
  geom_col() +
  geom_errorbar(aes(ymin = estimate_diff - se, ymax = estimate_diff + se), width = 0.2) +
  facet_wrap(~ TOP_TIER_SITE, scales = "free_y") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  coord_flip() +
  labs(
    title = "Significant seasonal fish length shifts by species and zone (seine net)",
    subtitle = "LSMean length difference (Late − Early), p < 0.05",
    x = "Species",
    y = "Length shift (mm)",
    fill = "Direction") +
  scale_fill_manual(values = c("Late > Early" = "steelblue", "Early > Late" = "tomato")) +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 10),
    axis.text.y = element_text(size = 7),
    legend.position = "top")

# Site
species_season_diffs_site <- lengths_seasonal %>%
  filter(!is.na(SITE_PARENT_NAME)) %>%
  group_by(SPECIES_NAME, SITE_PARENT_NAME) %>%
  filter(n_distinct(season) == 2, n_distinct(EVENT_YEAR) >= 2) %>%
  nest() %>%
  mutate(
    model = map(data, ~ lm(FISH_LENGTH ~ season + EVENT_YEAR, data = .x)),
    emmeans = map(model, ~ emmeans(.x, ~ season)),
    contrast = map(emmeans, ~ contrast(.x, method = "revpairwise") %>% as.data.frame())) %>%
  unnest(contrast) %>%
  rename(
    estimate_diff = estimate,
    se = SE,
    pval = p.value) %>%
  mutate(
    abs_diff = abs(estimate_diff),
    direction = ifelse(estimate_diff > 0, "Late > Early", "Early > Late"),
    sig = ifelse(pval < 0.05, "Yes", "No"))

ggplot(species_season_diffs_site %>% filter(sig == "Yes"),
       aes(x = reorder(SPECIES_NAME, estimate_diff), y = estimate_diff, fill = direction)) +
  geom_col() +
  geom_errorbar(aes(ymin = estimate_diff - se, ymax = estimate_diff + se), width = 0.2) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  coord_flip() +
  labs(
    title = "Significant seasonal fish length shifts by site and species",
    subtitle = "LSMean length difference (Late − Early), p < 0.05",
    x = "Species",
    y = "Length shift (mm)",
    fill = "Direction") +
  scale_fill_manual(values = c("Late > Early" = "steelblue", "Early > Late" = "tomato")) +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 9),
    axis.text.y = element_text(size = 6),
    legend.position = "top")

Functional guilds

# Load data
guilds <- read_csv("species_list_guilds.csv")

# Summarise
e_guild_species_year <- guilds %>%
  group_by(EVENT_DATE_YEAR, ECOLOGY_GUILD) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Compute diversity per guild per year
e_guild_trends <- e_guild_species_year %>%
  group_by(EVENT_DATE_YEAR) %>%
  summarise(
    Guild_Richness = n_distinct(ECOLOGY_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR)))

# Reshape 
e_guild_long <- e_guild_trends %>%
  pivot_longer(cols = -c(EVENT_DATE_YEAR), names_to = "Metric", values_to = "Value")

# Plot 
ggplot(e_guild_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(linewidth = 1) +  
  geom_smooth(method = "loess", se = TRUE, linewidth = 1) + 
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Ecology guild-level diversity metrics over time - Thames Estuary",
    subtitle = "LOESS smoothed trends grouped by ecological guild",
    x = "Year", y = "Metric value", color = "Ecology Guild") +
  theme_minimal()

# GLMs/LMMs for each metric
e_glm_richness   <- glm(Guild_Richness ~ EVENT_DATE_YEAR, data = e_guild_trends, family = poisson())
e_lm_abundance   <- lm(Guild_Abundance ~ EVENT_DATE_YEAR, data = e_guild_trends)
e_lm_shannon     <- lm(Shannon_Diversity ~ EVENT_DATE_YEAR, data = e_guild_trends)
e_lm_simpson     <- lm(Simpson_Diversity ~ EVENT_DATE_YEAR, data = e_guild_trends)
e_lm_margalef    <- lm(Margalef_Richness ~ EVENT_DATE_YEAR, data = e_guild_trends)
e_lm_evenness    <- lm(Pielou_Evenness ~ EVENT_DATE_YEAR, data = e_guild_trends)

# Extract slope and p-values
summary_table <- function(model, metric_name) {
  coef_summary <- summary(model)$coefficients
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p_col <- intersect(c("Pr(>|t|)", "Pr(>|z|)"), colnames(coef_summary))
  p <- coef_summary["EVENT_DATE_YEAR", p_col[1]]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ "")
  data.frame(
    Metric = metric_name,
    Slope = slope,
    SE = se,
    CI_low = ci_low,
    CI_high = ci_high,
    P_value = round(p, 4),
    Significance = sig
  )
}

# Summary table
e_results <- bind_rows(
  summary_table(e_glm_richness,   "Guild Richness"),
  #summary_table(lm_abundance,   "Total Abundance"),
  summary_table(e_lm_shannon,     "Shannon Diversity"),
  summary_table(e_lm_simpson,     "Simpson Diversity"),
  summary_table(e_lm_margalef,    "Margalef Richness"),
  summary_table(e_lm_evenness,    "Pielou Evenness"))

# Plot
ggplot(e_results, aes(x = Slope, y = reorder(Metric, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_point(size = 3, color = "tomato") +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.2, color = "gray40") +
  geom_text(aes(label = Significance), hjust = -0.3, vjust = 0.1, size = 5) +
  labs(
    title = "Trend in ecology guild diversity metrics over time - Thames Estuary",
    subtitle = "Slope ± 95% CI | GLM or LM by metric",
    x = "Slope (Change per year)",
    y = "Diversity metric",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal()

# Where are the hotspots of ecological change?
e_guild_species_site <- guilds %>%
  group_by(EVENT_DATE_YEAR, ECOLOGY_GUILD, SITE_PARENT_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

e_guild_trends_site <- e_guild_species_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Guild_Richness = n_distinct(ECOLOGY_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop"
  ) %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR))
  )

e_guild_trends_site_long <- e_guild_trends_site %>%
  pivot_longer(
    cols = c(Guild_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Guild_Abundance),
    names_to = "Metric", values_to = "Value"
  )

# Reusable model function (no change here)
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply models per site × metric
e_guild_site_results <- e_guild_trends_site_long %>%
  filter(!is.na(Value)) %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

e_guild_site_results <- e_guild_site_results %>%
  filter(!is.na(Significance))

ggplot(e_guild_site_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in ecology guild-level diversity metrics over time by site",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Summarise
f_guild_species_year <- guilds %>%
  group_by(EVENT_DATE_YEAR, FEEDING_GUILD) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Compute diversity per guild per year
f_guild_trends <- f_guild_species_year %>%
  group_by(EVENT_DATE_YEAR) %>%
  summarise(
    Guild_Richness = n_distinct(FEEDING_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR)))

# Reshape 
f_guild_long <- f_guild_trends %>%
  pivot_longer(cols = -c(EVENT_DATE_YEAR), names_to = "Metric", values_to = "Value")

# Plot 
ggplot(f_guild_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(linewidth = 1) +  
  geom_smooth(method = "loess", se = TRUE, linewidth = 1) + 
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Feeding guild-level diversity metrics over time - Thames Estuary",
    subtitle = "LOESS smoothed trends grouped by ecological guild",
    x = "Year", y = "Metric value", color = "Ecology Guild") +
  theme_minimal()

# GLMs/LMMs for each metric
f_glm_richness   <- glm(Guild_Richness ~ EVENT_DATE_YEAR, data = f_guild_trends, family = poisson())
f_lm_abundance   <- lm(Guild_Abundance ~ EVENT_DATE_YEAR, data = f_guild_trends)
f_lm_shannon     <- lm(Shannon_Diversity ~ EVENT_DATE_YEAR, data = f_guild_trends)
f_lm_simpson     <- lm(Simpson_Diversity ~ EVENT_DATE_YEAR, data = f_guild_trends)
f_lm_margalef    <- lm(Margalef_Richness ~ EVENT_DATE_YEAR, data = f_guild_trends)
f_lm_evenness    <- lm(Pielou_Evenness ~ EVENT_DATE_YEAR, data = f_guild_trends)

# Extract slope and p-values
summary_table <- function(model, metric_name) {
  coef_summary <- summary(model)$coefficients
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p_col <- intersect(c("Pr(>|t|)", "Pr(>|z|)"), colnames(coef_summary))
  p <- coef_summary["EVENT_DATE_YEAR", p_col[1]]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ "")
  data.frame(
    Metric = metric_name,
    Slope = slope,
    SE = se,
    CI_low = ci_low,
    CI_high = ci_high,
    P_value = round(p, 4),
    Significance = sig
  )
}

# Summary table
f_results <- bind_rows(
  summary_table(f_glm_richness,   "Guild Richness"),
  #summary_table(lm_abundance,   "Total Abundance"),
  summary_table(f_lm_shannon,     "Shannon Diversity"),
  summary_table(f_lm_simpson,     "Simpson Diversity"),
  summary_table(f_lm_margalef,    "Margalef Richness"),
  summary_table(f_lm_evenness,    "Pielou Evenness"))

# Plot
ggplot(f_results, aes(x = Slope, y = reorder(Metric, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_point(size = 3, color = "tomato") +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.2, color = "gray40") +
  geom_text(aes(label = Significance), hjust = -0.3, vjust = 0.1, size = 5) +
  labs(
    title = "Trend in feeding guild diversity metrics over time - Thames Estuary",
    subtitle = "Slope ± 95% CI | GLM or LM by metric",
    x = "Slope (Change per year)",
    y = "Diversity metric",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal()

# Where are the hotspots of ecological change?
f_guild_species_site <- guilds %>%
  group_by(EVENT_DATE_YEAR, FEEDING_GUILD, SITE_PARENT_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

f_guild_trends_site <- f_guild_species_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Guild_Richness = n_distinct(FEEDING_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop"
  ) %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR))
  )

f_guild_trends_site_long <- f_guild_trends_site %>%
  pivot_longer(
    cols = c(Guild_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Guild_Abundance),
    names_to = "Metric", values_to = "Value"
  )

# Reusable model function
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply models per site × metric
f_guild_site_results <- f_guild_trends_site_long %>%
  filter(!is.na(Value)) %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

f_guild_site_results <- f_guild_site_results %>%
  filter(!is.na(Significance))

ggplot(f_guild_site_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in feeding guild-level diversity metrics over time by site",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Summarise
r_guild_species_year <- guilds %>%
  group_by(EVENT_DATE_YEAR, REPRODUCTIVE_GUILD) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Compute diversity per guild per year
r_guild_trends <- r_guild_species_year %>%
  group_by(EVENT_DATE_YEAR) %>%
  summarise(
    Guild_Richness = n_distinct(REPRODUCTIVE_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR)))

# Reshape 
r_guild_long <- r_guild_trends %>%
  pivot_longer(cols = -c(EVENT_DATE_YEAR), names_to = "Metric", values_to = "Value")

# Plot 
ggplot(r_guild_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(linewidth = 1) +  
  geom_smooth(method = "loess", se = TRUE, linewidth = 1) + 
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Reproductive guild-level diversity metrics over time - Thames Estuary",
    subtitle = "LOESS smoothed trends grouped by ecological guild",
    x = "Year", y = "Metric value", color = "Ecology Guild") +
  theme_minimal()

# GLMs/LMMs for each metric
r_glm_richness   <- glm(Guild_Richness ~ EVENT_DATE_YEAR, data = r_guild_trends, family = poisson())
r_lm_abundance   <- lm(Guild_Abundance ~ EVENT_DATE_YEAR, data = r_guild_trends)
r_lm_shannon     <- lm(Shannon_Diversity ~ EVENT_DATE_YEAR, data = r_guild_trends)
r_lm_simpson     <- lm(Simpson_Diversity ~ EVENT_DATE_YEAR, data = r_guild_trends)
r_lm_margalef    <- lm(Margalef_Richness ~ EVENT_DATE_YEAR, data = r_guild_trends)
r_lm_evenness    <- lm(Pielou_Evenness ~ EVENT_DATE_YEAR, data = r_guild_trends)

# Extract slope and p-values
summary_table <- function(model, metric_name) {
  coef_summary <- summary(model)$coefficients
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p_col <- intersect(c("Pr(>|t|)", "Pr(>|z|)"), colnames(coef_summary))
  p <- coef_summary["EVENT_DATE_YEAR", p_col[1]]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ "")
  data.frame(
    Metric = metric_name,
    Slope = slope,
    SE = se,
    CI_low = ci_low,
    CI_high = ci_high,
    P_value = round(p, 4),
    Significance = sig
  )
}

# Summary table
r_results <- bind_rows(
  summary_table(r_glm_richness,   "Guild Richness"),
  #summary_table(lm_abundance,   "Total Abundance"),
  summary_table(r_lm_shannon,     "Shannon Diversity"),
  summary_table(r_lm_simpson,     "Simpson Diversity"),
  summary_table(r_lm_margalef,    "Margalef Richness"),
  summary_table(r_lm_evenness,    "Pielou Evenness"))

# Plot
ggplot(r_results, aes(x = Slope, y = reorder(Metric, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_point(size = 3, color = "tomato") +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.2, color = "gray40") +
  geom_text(aes(label = Significance), hjust = -0.3, vjust = 0.1, size = 5) +
  labs(
    title = "Trend in reproductive guild diversity metrics over time - Thames Estuary",
    subtitle = "Slope ± 95% CI | GLM or LM by metric",
    x = "Slope (Change per year)",
    y = "Diversity metric",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal()

# Where are the hotspots of ecological change?
r_guild_species_site <- guilds %>%
  group_by(EVENT_DATE_YEAR, REPRODUCTIVE_GUILD, SITE_PARENT_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

r_guild_trends_site <- r_guild_species_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Guild_Richness = n_distinct(REPRODUCTIVE_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop"
  ) %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR))
  )

r_guild_trends_site_long <- r_guild_trends_site %>%
  pivot_longer(
    cols = c(Guild_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Guild_Abundance),
    names_to = "Metric", values_to = "Value"
  )

# Reusable model function (no change here)
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply models per site × metric
r_guild_site_results <- r_guild_trends_site_long %>%
  filter(!is.na(Value)) %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

r_guild_site_results <- r_guild_site_results %>%
  filter(!is.na(Significance))

ggplot(r_guild_site_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in reproductive guild-level diversity metrics over time by site",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Functional redundancy

# Replace missing guild values to allow safe concatenation
guilds <- guilds %>%
  mutate(across(
    c(ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD),
    ~replace_na(., "NA")
  ))

# Create functional entity by combining all guilds
guilds <- guilds %>%
  mutate(FUNCTIONAL_ENTITY = paste(ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD, sep = "_"))

# Count distinct species per functional entity per year
redundancy <- guilds %>%
  group_by(EVENT_DATE_YEAR, FUNCTIONAL_ENTITY) %>%
  summarise(
    Species_Richness = n_distinct(LATIN_NAME),
    Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
    .groups = "drop"
  )

# Summarise 
redundancy_summary <- redundancy %>%
  group_by(EVENT_DATE_YEAR) %>%
  summarise(
    Distinct_Functions = n_distinct(FUNCTIONAL_ENTITY),
    Redundant_Functions = sum(Species_Richness > 1),
    Mean_Species_per_Function = mean(Species_Richness),
    Total_Species = sum(Species_Richness),
    .groups = "drop"
  )

# Pivot 
redundancy_long <- redundancy_summary %>%
  pivot_longer(cols = -EVENT_DATE_YEAR, names_to = "Metric", values_to = "Value")

# Plot 
ggplot(redundancy_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(linewidth = 1, color = "steelblue") +
  geom_smooth(method = "loess", se = TRUE, linewidth = 1, color = "tomato") +
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Functional redundancy metrics over time - Thames Estuary",
    subtitle = "Based on combinations of ecological, feeding, vertical and reproductive guilds",
    x = "Year", y = "Metric value"
  ) +
  theme_minimal()

# Build species × trait binary matrix 
traits_matrix <- guilds %>%
  filter(!is.na(LATIN_NAME)) %>%                          
  select(LATIN_NAME, ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD) %>%
  distinct() %>%
  mutate(across(everything(), as.factor)) %>%
  pivot_longer(cols = -LATIN_NAME, names_to = "TraitType", values_to = "TraitValue") %>%
  mutate(Present = 1) %>%
  pivot_wider(names_from = c(TraitType, TraitValue), values_from = Present, values_fill = 0) %>%
  filter(!is.na(LATIN_NAME)) %>%                          
  column_to_rownames("LATIN_NAME")

abundance_matrix <- guilds %>%
  group_by(EVENT_DATE_YEAR, LATIN_NAME) %>%
  summarise(Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop") %>%
  pivot_wider(names_from = LATIN_NAME, values_from = Abundance, values_fill = 0) %>%
  column_to_rownames("EVENT_DATE_YEAR")

shared_species <- intersect(rownames(traits_matrix), colnames(abundance_matrix))

# Filter 
traits_matrix_filtered <- traits_matrix[shared_species, , drop = FALSE]
abundance_matrix_filtered <- abundance_matrix[, shared_species, drop = FALSE]

fd_results <- dbFD(
  x = traits_matrix_filtered,
  a = abundance_matrix_filtered,
  calc.FRic = TRUE,
  m = "min",           
  stand.x = TRUE)
## FRic: Dimensionality reduction was required. The last 14 PCoA axes (out of 16 in total) were removed.
## FRic: Quality of the reduced-space representation = 0.3169416
# Convert to dataframe 
fd_df <- as.data.frame(fd_results[c("FRic", "FEve", "FDiv")])
fd_df$EVENT_DATE_YEAR <- as.numeric(rownames(fd_df))

fd_long <- fd_df %>%
  pivot_longer(cols = -EVENT_DATE_YEAR, names_to = "Index", values_to = "Value")

# Plot
ggplot(fd_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(linewidth = 1, color = "steelblue") +
  geom_smooth(method = "loess", se = TRUE, color = "tomato") +
  facet_wrap(~ Index, scales = "free_y") +
  labs(
    title = "Functional diversity indices - Thames Estuary",
    subtitle = "FRic (richness), FEve (evenness), FDiv (divergence)",
    x = "Year", y = "Value"
  ) +
  theme_minimal()

# Per zone and year
functional_redundancy_zone <- guilds %>%
  filter(!is.na(TOP_TIER_SITE)) %>%
  mutate(FUNCTIONAL_ENTITY = paste(ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD, sep = "_")) %>%
  group_by(EVENT_DATE_YEAR, TOP_TIER_SITE, FUNCTIONAL_ENTITY) %>%
  summarise(Species_Richness = n_distinct(LATIN_NAME), .groups = "drop")

redundancy_zone_summary <- functional_redundancy_zone %>%
  group_by(EVENT_DATE_YEAR, TOP_TIER_SITE) %>%
  summarise(
    Distinct_Functions = n_distinct(FUNCTIONAL_ENTITY),
    Redundant_Functions = sum(Species_Richness > 1),
    Mean_Species_per_Function = mean(Species_Richness),
    Total_Species = sum(Species_Richness),
    .groups = "drop"
  )

# Long
redundancy_zone_long <- redundancy_zone_summary %>%
  pivot_longer(cols = -c(EVENT_DATE_YEAR, TOP_TIER_SITE), names_to = "Metric", values_to = "Value")

# Model 
site_redundancy_model <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) return(empty_result)
  model <- tryCatch(lm(Value ~ EVENT_DATE_YEAR, data = df), error = function(e) return(NULL))
  if (is.null(model)) return(empty_result)
  s <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(s)) return(empty_result)
  slope <- s["EVENT_DATE_YEAR", "Estimate"]
  se <- s["EVENT_DATE_YEAR", "Std. Error"]
  p <- s["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***", p <= 0.01 ~ "**", p <= 0.05 ~ "*", TRUE ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

site_redundancy_results <- redundancy_zone_long %>%
  group_by(TOP_TIER_SITE, Metric) %>%
  group_modify(~ site_redundancy_model(.x)) %>%
  ungroup() %>%
  filter(!is.na(Slope))

# Plot
ggplot(site_redundancy_results, aes(x = Slope, y = reorder(TOP_TIER_SITE, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Functional redundancy trends by zone - Thames Estuary",
    subtitle = "Slope ± 95% CI | Linear models of site-level functional metrics",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Per site and year
functional_redundancy_site <- guilds %>%
  filter(!is.na(SITE_PARENT_NAME)) %>%
  mutate(FUNCTIONAL_ENTITY = paste(ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD, sep = "_")) %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME, FUNCTIONAL_ENTITY) %>%
  summarise(Species_Richness = n_distinct(LATIN_NAME), .groups = "drop")

redundancy_site_summary <- functional_redundancy_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Distinct_Functions = n_distinct(FUNCTIONAL_ENTITY),
    Redundant_Functions = sum(Species_Richness > 1),
    Mean_Species_per_Function = mean(Species_Richness),
    Total_Species = sum(Species_Richness),
    .groups = "drop"
  )

# Long
redundancy_site_long <- redundancy_site_summary %>%
  pivot_longer(cols = -c(EVENT_DATE_YEAR, SITE_PARENT_NAME), names_to = "Metric", values_to = "Value")

# Model 
site_redundancy_model <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) return(empty_result)
  model <- tryCatch(lm(Value ~ EVENT_DATE_YEAR, data = df), error = function(e) return(NULL))
  if (is.null(model)) return(empty_result)
  s <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(s)) return(empty_result)
  slope <- s["EVENT_DATE_YEAR", "Estimate"]
  se <- s["EVENT_DATE_YEAR", "Std. Error"]
  p <- s["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***", p <= 0.01 ~ "**", p <= 0.05 ~ "*", TRUE ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

site_redundancy_results <- redundancy_site_long %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_redundancy_model(.x)) %>%
  ungroup() %>%
  filter(!is.na(Slope))

# Plot
ggplot(site_redundancy_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Functional redundancy trends by site - Thames Estuary",
    subtitle = "Slope ± 95% CI | Linear models of site-level functional metrics",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

Sites overview

Which are the common species across sites?

top_common_species <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME), !is.na(SITE_PARENT_NAME)) %>%
  count(SPECIES_NAME, SITE_PARENT_NAME) %>%
  group_by(SPECIES_NAME) %>%
  summarise(n_sites = n_distinct(SITE_PARENT_NAME), .groups = "drop") %>%
  arrange(desc(n_sites)) %>%
  pull(SPECIES_NAME)

site_species_list <- counts_thames_filtered %>%
  filter(SPECIES_NAME %in% top_common_species) %>%
  distinct(SITE_PARENT_NAME, SPECIES_NAME) %>%
  group_by(SITE_PARENT_NAME) %>%
  summarise(
    species_list = paste(sort(unique(SPECIES_NAME)), collapse = ", "),
    n_species = n(),
    .groups = "drop")

site_species_coords <- site_species_list %>%
  left_join(
    sites %>% distinct(SITE_PARENT_NAME, TOP_TIER_SITE, SITE_RANKED_EASTING, SITE_RANKED_NORTHING),
    by = "SITE_PARENT_NAME") %>%
  filter(!is.na(SITE_RANKED_EASTING))

site_species_sf <- site_species_coords %>%
  st_as_sf(coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
  st_transform(4326)

site_species_sf$lon <- st_coordinates(site_species_sf)[, 1]
site_species_sf$lat <- st_coordinates(site_species_sf)[, 2]

leaflet(site_species_sf) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircleMarkers(
    lng = ~lon, lat = ~lat,
    radius = ~scales::rescale(n_species, to = c(4, 10)),
    fillColor = ~colorNumeric("Blues", domain = site_species_sf$n_species)(n_species),
    fillOpacity = 0.8,
    stroke = TRUE,
    color = "black",
    popup = ~paste0(
      "<b>Site:</b> ", SITE_PARENT_NAME, "<br>",
      "<b>Zone:</b> ", TOP_TIER_SITE, "<br>",
      "<b>Species Detected:</b> ", n_species, " / ", length(top_common_species), "<br>",
      "<b>Species List:</b><br>", species_list)) %>%
  addLegend(
    position = "bottomright",
    pal = colorNumeric("Blues", domain = site_species_sf$n_species),
    values = ~n_species,
    title = "Top Species Detected",
    opacity = 1)
site_species_table <- site_species_coords %>%
  select(SITE_PARENT_NAME, TOP_TIER_SITE, n_species, species_list) %>%
  arrange(desc(n_species)) %>%
  gt() %>%
  tab_header(
    title = "Top common species detected per site",
    subtitle = paste0("Based on presence of top ", length(top_common_species), " most widespread species")
  ) %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    TOP_TIER_SITE = "Estuary Zone",
    n_species = "No. of Top Species",
    species_list = "Species Detected"
  ) %>%
  fmt_number(columns = n_species, decimals = 0) %>%
  tab_options(
    table.font.size = "small",
    data_row.padding = px(4),
    column_labels.font.weight = "bold"
  ) %>%
  data_color(
    columns = n_species,
    colors = scales::col_numeric(palette = "Blues", domain = NULL)
  )

site_species_table
Top common species detected per site
Based on presence of top 58 most widespread species
Site Estuary Zone No. of Top Species Species Detected
West Thurrock Thames Middle 31 3-spined stickleback, 5-bearded rockling, Big scaled sand smelt, Cod, Common bream, Common goby, Dab, Dover sole, European eel, Flounder, Goby sp., Golden grey mullet, Greater pipefish, Grey mullet sp., Herring, Hooknose / Pogge, Lesser (Nillsons) pipefish, Long-spined sea scorpion, Perch, Plaice, Red mullet, Roach, Sand goby, Sand smelt, Sandeel sp., Sea bass, Smelt, Sprat, Thick lipped grey mullet, Thin lipped grey mullet, Transparent goby
Richmond Thames Upper 29 3-spined stickleback, Atlantic salmon, Barbel, Big scaled sand smelt, Bleak, Brown / sea trout, Bullhead, Chub, Common bream, Common carp varieties, Common goby, Dace, European eel, European eels > elvers, Flounder, Goby sp., Gudgeon, Minnow, Mirror carp, Perch, Roach, Roach x common bream hybrid, Rudd, Sand goby, Sand smelt, Sea bass, Smelt, Stickleback sp., Thin lipped grey mullet
Greenwich Thames Middle 28 3-spined stickleback, Big scaled sand smelt, Brown / sea trout, Chub, Common bream, Common goby, Dace, Dover sole, European eel, European eels > elvers, Flounder, Gilthead bream, Grey mullet sp., Herring, Perch, Pike, Roach, Roach x common bream hybrid, Rudd, Sand goby, Sand smelt, Sea bass, Short-snouted seahorse, Smelt, Sprat, Thick lipped grey mullet, Thin lipped grey mullet, Zander
Kew Thames Upper 27 3-spined stickleback, Bleak, Brown / sea trout, Bullhead, Chub, Common bream, Common goby, Dace, European eel, European eels > elvers, European elvers, Flounder, Goby sp., Gudgeon, Mirror carp, Perch, Pike, Roach, Roach x common bream hybrid, Sand goby, Sand smelt, Sea bass, Smelt, Stone loach, Thick lipped grey mullet, Thin lipped grey mullet, Zander
Battersea Thames Upper 26 3-spined stickleback, Big scaled sand smelt, Bleak, Brown / sea trout, Bullhead, Chub, Common bream, Common goby, Dace, European eel, European eels > elvers, European elvers, Flounder, Goby sp., Gudgeon, Perch, Roach, Roach x common bream hybrid, Rudd, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Thin lipped grey mullet, Zander
Chiswick Thames Upper 25 3-spined stickleback, Bleak, Brown / sea trout, Bullhead, Chub, Common bream, Common goby, Dace, European eel, European eels > elvers, Flounder, Goby sp., Grey mullet sp., Minnow, Perch, Pike, Roach, Roach x common bream hybrid, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Stone loach, Thin lipped grey mullet
Denton Wharf Thames Middle 18 Anchovy, Common goby, Dover sole, European eel, European eels > elvers, Flounder, Greater pipefish, Grey mullet sp., Herring, Herring like sp., Plaice, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Thin lipped grey mullet, Transparent goby
Hammersmith Thames Upper 18 10-spined stickleback, 3-spined stickleback, Bleak, Common bream, Common carp varieties, Common goby, Dace, European eel, Flounder, Perch, Roach, Roach x common bream hybrid, Sand goby, Sand smelt, Sea bass, Smelt, Tench varieties, Thin lipped grey mullet
Greenhithe Thames Middle 17 3-bearded rockling, 3-spined stickleback, Common goby, Dover sole, European eel, Flounder, Herring, Long-spined sea scorpion, Perch, Plaice, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Thick lipped grey mullet, Thin lipped grey mullet
Brentford Thames Upper 16 3-spined stickleback, Bleak, Brown / sea trout, Chub, Common bream, Common goby, Dace, European eel, Flounder, Gudgeon, Perch, Roach, Sand smelt, Sea bass, Smelt, Thin lipped grey mullet
Stanford-le-Hope Beach Thames Lower 15 Anchovy, Common goby, Dover sole, European eel, Flounder, Herring, Painted goby, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Thick lipped grey mullet, Thin lipped grey mullet, Transparent goby
Thamesmead Thames Middle 13 3-spined stickleback, Common bream, Common goby, Dace, European eel, Flounder, Roach, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Thin lipped grey mullet
Chelsea Creek Thames Upper 12 Common bream, Common carp varieties, Common goby, Dace, European eel, Flounder, Perch, Roach, Sand smelt, Sea bass, Smelt, Thin lipped grey mullet
Putney Thames Upper 11 3-spined stickleback, Brown / sea trout, Common goby, Dace, European eel, Flounder, Perch, Roach, Sand goby, Sand smelt, Sea bass
Vauxhall Thames Middle 11 3-spined stickleback, Atlantic salmon, Common goby, Dace, European eel, Flounder, Perch, Roach, Sand goby, Sand smelt, Sea bass
Teddington Weir Thames Upper 10 3-spined stickleback, Bleak, Common bream, Common goby, Dace, Flounder, Gudgeon, Perch, Roach, Sea bass
Chelsea Thames Upper 8 3-spined stickleback, Common bream, Dace, Flounder, Roach, Sand goby, Sand smelt, Sea bass
Mucking Thames Middle 8 European eels > elvers, Flounder, Herring, Sand smelt, Sea bass, Smelt, Thick lipped grey mullet, Thin lipped grey mullet
Fulham Thames Upper 5 3-spined stickleback, Dace, European eel, Flounder, Roach
Isleworth Thames Upper 5 3-spined stickleback, Dace, Gudgeon, Perch, Roach
Petersham Thames Upper 4 3-spined stickleback, Dace, Perch, Roach

Nursery score

The nursery score is a composite metric designed to evaluate the importance of sites as nursery habitats for juvenile fish. It integrates three key components: the mean length of all fish caught at a site, the mean length of juveniles, and the total number of juvenile individuals observed.

First, juvenile fish were identified for each species using a species-specific threshold, defined as the 25th percentile of lengths for that species across all sites. Then, for each site, the average total fish length, average juvenile length, and juvenile abundance were calculated. These three variables were rescaled to a 0–1 range (with lower mean lengths and higher juvenile counts contributing to higher scores), and the final nursery score was computed as the average of the three rescaled values.

This score highlights sites where juvenile fish are both abundant and relatively small.

site_length_stats <- lengths_thames_filtered %>%
  group_by(SITE_PARENT_NAME, TOP_TIER_SITE) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    n_fish = n(),
    .groups = "drop")

lengths_tagged <- lengths_thames_filtered %>%
  group_by(SPECIES_NAME) %>%
  mutate(juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(is_juvenile = FISH_LENGTH <= juvenile_threshold)

juvenile_site_stats <- lengths_tagged %>%
  filter(is_juvenile) %>%
  group_by(SITE_PARENT_NAME, TOP_TIER_SITE) %>%
  summarise(
    mean_juvenile_length = mean(FISH_LENGTH, na.rm = TRUE),
    n_juveniles = n(),
    .groups = "drop")

nursery_combined <- site_length_stats %>%
  inner_join(juvenile_site_stats, by = c("SITE_PARENT_NAME", "TOP_TIER_SITE")) %>%
  mutate(
    score_length = rescale(-mean_length, to = c(0, 1)),
    score_juvenile_size = rescale(-mean_juvenile_length, to = c(0, 1)),
    score_juvenile_count = rescale(n_juveniles, to = c(0, 1)),
    nursery_score = round((score_length + score_juvenile_size + score_juvenile_count) / 3, 3)) %>%
  arrange(desc(nursery_score))

sites <- read_csv("TraC_Fish_Sites_2025-04-02.csv")

sites_sf <- sites %>%
  filter(!is.na(SITE_RANKED_EASTING), !is.na(SITE_RANKED_NORTHING)) %>%
  st_as_sf(coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
  st_transform(crs = 4326) %>%
  mutate(
    lon = st_coordinates(.)[, 1],
    lat = st_coordinates(.)[, 2]
  ) %>%
  select(SITE_PARENT_NAME, lat, lon)

nursery_map <- nursery_combined %>%
  left_join(sites_sf, by = "SITE_PARENT_NAME") %>%
  filter(!is.na(lat), !is.na(lon))

leaflet(nursery_map) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircleMarkers(
    lng = ~lon, lat = ~lat,
    radius = ~rescale(nursery_score, to = c(5, 12)),  
    fillColor = ~colorNumeric("YlOrRd", domain = nursery_map$nursery_score)(nursery_score),
    fillOpacity = 0.95,
    color = "black", stroke = TRUE, weight = 0.8,
    popup = ~paste0(
      "<b>Site:</b> ", SITE_PARENT_NAME, "<br>",
      "<b>Zone:</b> ", TOP_TIER_SITE, "<br>",
      "<b><span style='color:darkred'>Nursery Score:</span></b> ", nursery_score, "<br>",
      "<b>Mean Length:</b> ", round(mean_length, 1), " mm<br>",
      "<b>Juvenile Length:</b> ", round(mean_juvenile_length, 1), " mm<br>",
      "<b>Juvenile Count:</b> ", n_juveniles
    )
  ) %>%
  addLegend(
    "bottomright",
    pal = colorNumeric("YlOrRd", domain = nursery_map$nursery_score),
    values = nursery_map$nursery_score,
    title = "Nursery Score (0–1)",
    opacity = 1
  )
nursery_combined %>%
  select(SITE_PARENT_NAME, TOP_TIER_SITE, mean_length, mean_juvenile_length, n_juveniles, nursery_score) %>%
  arrange(desc(nursery_score)) %>%
  gt() %>%
  tab_header(
    title = "Nursery Site Ranking",
    subtitle = "Based on fish length, juvenile size, and juvenile abundance"
  ) %>%
  fmt_number(
    columns = c(mean_length, mean_juvenile_length, nursery_score), decimals = 1
  ) %>%
  fmt_number(columns = n_juveniles, decimals = 0) %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    TOP_TIER_SITE = "Estuary Zone",
    mean_length = "Mean Length (mm)",
    mean_juvenile_length = "Juvenile Length (mm)",
    n_juveniles = "Juvenile Count",
    nursery_score = "Nursery Score"
  ) %>%
  data_color(
    columns = nursery_score,
    colors = scales::col_numeric(palette = "YlOrRd", domain = NULL)
  ) %>%
  tab_options(
    table.font.size = "small",
    data_row.padding = px(5)
  )
Nursery Site Ranking
Based on fish length, juvenile size, and juvenile abundance
Site Estuary Zone Mean Length (mm) Juvenile Length (mm) Juvenile Count Nursery Score
Richmond Thames Upper 53.1 30.5 2,312 0.7
Putney Thames Upper 31.8 20.8 80 0.6
Fulham Thames Upper 26.9 22.3 14 0.6
Chiswick Thames Upper 46.9 30.4 1,098 0.6
Chelsea Thames Upper 33.9 26.1 95 0.6
Hammersmith Thames Upper 38.3 27.8 407 0.6
Battersea Thames Upper 50.6 29.5 926 0.5
Brentford Thames Upper 42.7 30.7 669 0.5
Greenwich Thames Middle 54.5 35.9 1,336 0.5
Thamesmead Thames Middle 41.4 29.0 159 0.5
Kew Thames Upper 59.0 35.1 1,152 0.5
Vauxhall Thames Middle 46.7 31.1 31 0.4
Teddington Weir Thames Upper 53.7 32.0 53 0.4
Greenhithe Thames Middle 51.1 37.5 133 0.3
West Thurrock Thames Middle 61.0 39.3 671 0.3
Denton Wharf Thames Middle 67.0 38.2 206 0.2
Stanford-le-Hope Beach Thames Lower 65.9 40.4 192 0.2
Chelsea Creek Thames Upper 62.4 46.8 211 0.2
Mucking Thames Middle 83.7 43.2 29 0.0

# Convert to sf object
sites_sf <- st_as_sf(sites, coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
  st_transform(crs = 4326)

coords <- st_coordinates(sites_sf)
sites_sf$lon <- coords[, 1]
sites_sf$lat <- coords[, 2]

richness_trends <- site_results_clean %>%
  filter(Metric == "Species_Richness") %>%
  select(SITE_PARENT_NAME, Slope, Significance)

# Add direction category
richness_trends <- richness_trends %>%
  mutate(Richness_Trend = case_when(
    Significance != "" & Slope > 0 ~ "Increasing",
    Significance != "" & Slope < 0 ~ "Decreasing",
    TRUE ~ "No Trend"
  ))

juvenile_hotspots <- nursery_combined %>%
  select(SITE_PARENT_NAME, TOP_TIER_SITE, score_length, nursery_score, n_juveniles)

site_overlay_data <- sites_sf %>%
  left_join(richness_trends, by = "SITE_PARENT_NAME") %>%
  left_join(juvenile_hotspots, by = "SITE_PARENT_NAME")

# Diversity
diversity_summary <- diversity_site_year %>%
  group_by(SITE_PARENT_NAME) %>%
  summarise(
    Species_Richness = mean(Species_Richness, na.rm = TRUE),
    Shannon_Diversity = mean(Shannon_Diversity, na.rm = TRUE),
    Simpson_Diversity = mean(Simpson_Diversity, na.rm = TRUE),
    Margalef_Richness = mean(Margalef_Richness, na.rm = TRUE),
    Pielou_Evenness = mean(Pielou_Evenness, na.rm = TRUE),
    Total_Abundance = mean(Total_Abundance, na.rm = TRUE),
    .groups = "drop"
  )

# Combine
site_rankings_full <- site_overlay_data %>%
  st_drop_geometry() %>%
  select(SITE_PARENT_NAME, TOP_TIER_SITE.x, nursery_score, n_juveniles) %>%
  filter(!is.na(nursery_score)) %>%
  left_join(diversity_summary, by = "SITE_PARENT_NAME") %>%
  mutate(
    nursery_score = round(nursery_score, 2),
    across(c(Species_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Total_Abundance),
           ~ round(.x, 2))
  )

# Rank
site_rankings_full %>%
  distinct(SITE_PARENT_NAME, .keep_all = TRUE) %>%
  mutate(
    TOP_TIER_SITE.x = factor(TOP_TIER_SITE.x,
                             levels = c("Thames Upper", "Thames Middle", "Thames Lower"))
  ) %>%
  arrange(TOP_TIER_SITE.x) %>%
  rename(
    `Nursery Score` = nursery_score,
    `Juvenile Abundance` = n_juveniles,
    `Total Abundance` = Total_Abundance
  ) %>%
  gt() %>%
  tab_header(title = "Site rankings by juvenile and diversity metrics") %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    TOP_TIER_SITE.x = "Zone",
    `Nursery Score` = "Nursery score",
    Species_Richness = "Richness",
    Shannon_Diversity = "Shannon",
    Simpson_Diversity = "Simpson",
    Margalef_Richness = "Margalef",
    Pielou_Evenness = "Evenness",
    `Juvenile Abundance` = "Juvenile abundance",
    `Total Abundance` = "Total abundance"
  ) %>%
  data_color(
    columns = vars(
      `Nursery Score`,
      Species_Richness,
      Shannon_Diversity,
      Simpson_Diversity,
      Margalef_Richness,
      Pielou_Evenness,
      `Juvenile Abundance`,
      `Total Abundance`
    ),
    colors = scales::col_numeric(palette = "Oranges", domain = NULL)
  )
Site rankings by juvenile and diversity metrics
Site Zone Nursery score Juvenile abundance Richness Shannon Simpson Margalef Evenness Total abundance
Battersea Thames Upper 0.55 926 8.35 1.98 0.83 1.49 0.96 213.26
Brentford Thames Upper 0.54 669 6.62 1.75 0.80 1.11 0.98 174.25
Chelsea Creek Thames Upper 0.15 211 7.50 1.86 0.82 1.34 0.99 322.00
Chelsea Thames Upper 0.57 95 8.00 2.02 0.86 1.39 0.97 152.00
Chiswick Thames Upper 0.58 1098 8.48 2.02 0.85 1.70 0.96 142.15
Fulham Thames Upper 0.65 14 5.00 1.61 0.80 1.23 1.00 26.00
Hammersmith Thames Upper 0.57 407 7.57 1.91 0.83 1.38 0.97 156.86
Kew Thames Upper 0.46 1152 8.67 2.03 0.85 1.46 0.96 303.63
Putney Thames Upper 0.65 80 7.00 1.80 0.81 1.47 0.98 68.00
Richmond Thames Upper 0.72 2312 8.97 2.05 0.85 1.32 0.96 611.71
Teddington Weir Thames Upper 0.37 53 5.67 1.70 0.81 1.05 0.99 115.00
Greenhithe Thames Middle 0.33 133 11.00 2.29 0.89 2.10 0.97 129.33
Greenwich Thames Middle 0.50 1336 10.14 2.21 0.88 1.89 0.96 177.93
Mucking Thames Middle 0.05 29 4.33 1.43 0.75 0.83 0.98 62.00
Thamesmead Thames Middle 0.50 159 10.00 2.25 0.89 1.76 0.98 165.00
Vauxhall Thames Middle 0.42 31 7.50 1.91 0.83 1.77 0.96 38.50
West Thurrock Thames Middle 0.32 671 9.54 2.13 0.87 1.57 0.96 284.58
Denton Wharf Thames Middle 0.23 206 8.90 2.07 0.86 1.60 0.96 214.50
Stanford-le-Hope Beach Thames Lower 0.21 192 7.38 1.89 0.83 1.35 0.95 288.75

Which sites are ecologically rich and function well as nurseries?

The composite score represents an integrated index that ranks sites based on a combination of ecological and nursery-related metrics. It is calculated by first selecting a suite of indicators—namely nursery score, species richness, Shannon and Simpson diversity indices, Margalef richness, Pielou evenness, juvenile abundance, total fish abundance, and the number of widespread species observed per site.

To ensure comparability across these differently scaled variables, each metric is rescaled to a 0–1 range. The composite score is then derived by averaging these rescaled values, resulting in a balanced metric where higher scores indicate sites that perform well both in terms of biodiversity and nursery function.

top_common_species <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME), !is.na(SITE_PARENT_NAME)) %>%
  count(SPECIES_NAME, SITE_PARENT_NAME) %>%
  group_by(SPECIES_NAME) %>%
  summarise(n_sites = n_distinct(SITE_PARENT_NAME), .groups = "drop") %>%
  arrange(desc(n_sites)) %>%
  pull(SPECIES_NAME)

site_species_list <- counts_thames_filtered %>%
  filter(SPECIES_NAME %in% top_common_species) %>%
  distinct(SITE_PARENT_NAME, SPECIES_NAME) %>%
  group_by(SITE_PARENT_NAME) %>%
  summarise(
    species_list = paste(sort(unique(SPECIES_NAME)), collapse = ", "),
    n_species = n(),
    .groups = "drop")

site_rankings_with_species <- site_rankings_full %>%
  left_join(
    site_species_list %>% select(SITE_PARENT_NAME, n_species),
    by = "SITE_PARENT_NAME"
  )

site_rankings_scaled <- site_rankings_with_species %>%
  mutate(across(
    c(nursery_score, Species_Richness, Shannon_Diversity, Simpson_Diversity,
      Margalef_Richness, Pielou_Evenness, n_juveniles, Total_Abundance, n_species),
    ~ rescale(.x, to = c(0, 1), na.rm = TRUE),
    .names = "scaled_{.col}"
  )) %>%
  rowwise() %>%
  mutate(
    composite_score = mean(c_across(starts_with("scaled_")), na.rm = TRUE)
  ) %>%
  ungroup()

top_3_sites <- site_rankings_scaled %>%
  group_by(TOP_TIER_SITE.x) %>%
  slice_max(composite_score, n = 7, with_ties = FALSE) %>%
  arrange(TOP_TIER_SITE.x, desc(composite_score)) %>%
  ungroup()

top_3_sites %>%
  select(SITE_PARENT_NAME, TOP_TIER_SITE.x, composite_score) %>%
  rename(
    Site = SITE_PARENT_NAME,
    Zone = TOP_TIER_SITE.x,
    `Composite Score` = composite_score
  ) %>%
  distinct(Site, .keep_all = TRUE) %>%
  arrange(desc(`Composite Score`)) %>%
  mutate(Rank = row_number()) %>%
  select(Rank, everything()) %>%
  gt() %>%
  tab_header(title = "Top 3 sites per estuary zone") %>%
  fmt_number(columns = `Composite Score`, decimals = 2) %>%
  data_color(
    columns = `Composite Score`,
    colors = scales::col_numeric(palette = "Oranges", domain = NULL)
  ) %>%
  tab_options(table.font.size = "small", data_row.padding = px(4))
Top 3 sites per estuary zone
Rank Site Zone Composite Score
1 Richmond Thames Upper 0.74
2 Greenwich Thames Middle 0.68
3 Greenhithe Thames Middle 0.61
4 Thamesmead Thames Middle 0.60
5 Kew Thames Upper 0.58
6 Chiswick Thames Upper 0.57
7 Stanford-le-Hope Beach Thames Lower 0.35

Habitat quality index

The Habitat Quality Index was calcualted based on species richness, total fish abundance, distinct functional entities (unique functional roles represented), mean redundancy (average number of species performing the same ecological function), and number of redundant functions (functions supported by more than one species). To generate a composite score representing habitat quality, the values were scaled and summarised.

# zone
counts_guilds <- counts_thames_filtered %>%
  select(TOP_TIER_SITE, LATIN_NAME) %>%
  left_join(guilds, by = "LATIN_NAME") %>%
  filter(!is.na(SPECIES_NAME) & !is.na(FISH_COUNT)) %>%
  mutate(EVENT_DATE_YEAR = as.numeric(EVENT_DATE_YEAR))

counts_guilds <- counts_guilds %>%
  mutate(FUNCTIONAL_ENTITY = paste(ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD, sep = "_"))

site_scores <- counts_guilds %>%
  group_by(TOP_TIER_SITE.y, EVENT_DATE_YEAR) %>%
  summarise(
    Species_Richness = n_distinct(SPECIES_NAME),
    Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
    Distinct_Functions = n_distinct(FUNCTIONAL_ENTITY),
    Mean_Redundancy = mean(table(FUNCTIONAL_ENTITY)),
    Redundant_Functions = sum(table(FUNCTIONAL_ENTITY) > 1),
    .groups = "drop")

site_mean_scores <- site_scores %>%
  group_by(TOP_TIER_SITE.y) %>%
  summarise(across(Species_Richness:Redundant_Functions, mean, na.rm = TRUE), .groups = "drop")

site_scaled <- site_mean_scores %>%
  mutate(across(Species_Richness:Redundant_Functions,
                ~ (. - min(.)) / (max(.) - min(.)), .names = "scaled_{.col}"))

# Composite habitat quality index
site_scaled <- site_scaled %>%
  rowwise() %>%
  mutate(Habitat_Quality_Index = mean(c_across(starts_with("scaled_")))) %>%
  ungroup()

# Rank
site_scaled <- site_scaled %>%
  select(TOP_TIER_SITE.y, Habitat_Quality_Index) %>%
  arrange(desc(Habitat_Quality_Index))

# Plot
ggplot(site_scaled, aes(x = reorder(TOP_TIER_SITE.y, Habitat_Quality_Index),
                        y = Habitat_Quality_Index)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(title = "Habitat quality index by zone",
       x = "Zone", y = "Composite habitat quality score (0–1)") +
  theme_minimal()

# Site
counts_guilds <- counts_thames_filtered %>%
  select(SITE_PARENT_NAME, LATIN_NAME) %>%
  left_join(guilds, by = "LATIN_NAME") %>%
  filter(!is.na(SPECIES_NAME) & !is.na(FISH_COUNT)) %>%
  mutate(EVENT_DATE_YEAR = as.numeric(EVENT_DATE_YEAR))

counts_guilds <- counts_guilds %>%
  mutate(FUNCTIONAL_ENTITY = paste(ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD, sep = "_"))

site_scores <- counts_guilds %>%
  group_by(SITE_PARENT_NAME.y, EVENT_DATE_YEAR) %>%
  summarise(
    Species_Richness = n_distinct(SPECIES_NAME),
    Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
    Distinct_Functions = n_distinct(FUNCTIONAL_ENTITY),
    Mean_Redundancy = mean(table(FUNCTIONAL_ENTITY)),
    Redundant_Functions = sum(table(FUNCTIONAL_ENTITY) > 1),
    .groups = "drop")

site_mean_scores <- site_scores %>%
  group_by(SITE_PARENT_NAME.y) %>%
  summarise(across(Species_Richness:Redundant_Functions, mean, na.rm = TRUE), .groups = "drop")

site_scaled <- site_mean_scores %>%
  mutate(across(Species_Richness:Redundant_Functions,
                ~ (. - min(.)) / (max(.) - min(.)), .names = "scaled_{.col}"))

# Composite habitat quality index
site_scaled <- site_scaled %>%
  rowwise() %>%
  mutate(Habitat_Quality_Index = mean(c_across(starts_with("scaled_")))) %>%
  ungroup()

# Rank
site_scaled <- site_scaled %>%
  select(SITE_PARENT_NAME.y, Habitat_Quality_Index) %>%
  arrange(desc(Habitat_Quality_Index))

# Plot
ggplot(site_scaled, aes(x = reorder(SITE_PARENT_NAME.y, Habitat_Quality_Index),
                        y = Habitat_Quality_Index)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(title = "Habitat quality index by site",
       x = "Site", y = "Composite habitat quality score (0–1)") +
  theme_minimal()

Sampling sites

Sites overview ArcGIS map

setwd("C:/Users/bodna/OneDrive - University College London/Documents/PhD/PhD docs")

site_data <- read_excel("locations.xlsx")

valid_types <- c("Natural", "Degraded", "Restored", "Engineered")
site_data <- site_data %>%
  filter(Type %in% valid_types, !is.na(Lat), !is.na(Long))

m <- leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron)

for (site_type in valid_types) {
  type_data <- site_data %>% filter(Type == site_type)
  color <- case_when(
    site_type == "Natural" ~ "green",
    site_type == "Degraded" ~ "red",
    site_type == "Restored" ~ "orange",
    site_type == "Engineered" ~ "blue"
  )
  
  m <- m %>%
    addCircleMarkers(
      data = type_data,
      lng = ~Long, lat = ~Lat,
      group = site_type,
      radius = 6,
      color = color,
      fillOpacity = 0.8,
      label = ~Site,
      popup = ~paste0(
        "<b>", Site, "</b><br>",
        "Habitat: ", Habitat, "<br>",
        "Type: ", Type, "<br>",
        "Salinity: ", Salinity, "<br>",
        "Access: ", Access, "<br>",
        "Oversight: ", Oversight
      )
    )
}

m <- m %>%
  addLayersControl(
    overlayGroups = valid_types,
    options = layersControlOptions(collapsed = FALSE)
  ) %>%
  addLegend(
    position = "bottomright",
    colors = c("green", "red", "orange", "blue"),
    labels = valid_types,
    title = "Site type")

m

Top EA sites

top_sites_per_zone <- counts_thames_filtered %>%
  group_by(TOP_TIER_SITE, SITE_PARENT_NAME) %>%
  summarise(total_samples = n(), .groups = "drop") %>%
  filter(!is.na(TOP_TIER_SITE)) %>%
  mutate(TOP_TIER_SITE = factor(TOP_TIER_SITE, levels = c("Thames Upper", "Thames Middle", "Thames Lower"))) %>%
  group_by(TOP_TIER_SITE) %>%
  slice_max(order_by = total_samples, n = 3, with_ties = FALSE) %>%
  arrange(TOP_TIER_SITE, desc(total_samples))

top_sites_per_zone %>%
  gt() %>%
  tab_header(
    title = "Top 3 most-sampled sites per estuarine zone") %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    TOP_TIER_SITE = "Zone",
    total_samples = "Total number of samples")
Top 3 most-sampled sites per estuarine zone
Site Total number of samples
Thames Upper
Richmond 458
Battersea 433
Kew 401
Thames Middle
Greenwich 488
West Thurrock 432
Denton Wharf 182
Thames Lower
Stanford-le-Hope Beach 125
top_sites_data <- counts_thames_filtered %>%
  semi_join(top_sites_per_zone, by = c("TOP_TIER_SITE", "SITE_PARENT_NAME"))
# Diversity
diversity_site_year <- top_sites_data %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Species_Richness = n_distinct(SPECIES_NAME),
    Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
    Shannon_Diversity = diversity(table(SPECIES_NAME), index = "shannon"),
    Simpson_Diversity = diversity(table(SPECIES_NAME), index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Species_Richness - 1) / log(Total_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Species_Richness),
    EVENT_DATE_YEAR = as.numeric(EVENT_DATE_YEAR))

# Reshape
diversity_long_site <- diversity_site_year %>%
  pivot_longer(cols = c(Species_Richness, Shannon_Diversity, Simpson_Diversity,
                        Margalef_Richness, Pielou_Evenness, Total_Abundance),
               names_to = "Metric", values_to = "Value")

# Model
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply per group
site_results <- diversity_long_site %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

site_results_clean <- site_results %>%
  filter(!is.na(Slope))

# Plot
ggplot(site_results_clean, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in diversity metrics over time by site - Thames Estuary",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Site
species_site_trends <- top_sites_data %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME, SPECIES_NAME) %>%
  summarise(Total_Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Model
species_model_summary <- function(df) {
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Total_Abundance, na.rm = TRUE) == 0) {
    return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
  }
  model <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = df)
  s <- summary(model)
  slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
  se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

species_site_results <- species_site_trends %>%
  group_by(SITE_PARENT_NAME, SPECIES_NAME) %>%
  group_modify(~ species_model_summary(.x)) %>%
  ungroup()

species_site_results_clean <- species_site_results %>%
  filter(!is.na(Slope) & Significance != "")

# Plot
ggplot(species_site_results_clean,
       aes(x = Slope, y = reorder(SPECIES_NAME, Slope), color = Significance)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y", nrow = 2) +
  labs(
    title = "Species-level abundance trends by site",
    subtitle = "Slopes from per-site linear models ± 95% CI",
    x = "Slope (Change in abundance per year)",
    y = "Species",
    color = "Significance",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal(base_size = 12) +
  theme(
    strip.text = element_text(face = "bold", size = 10),
    axis.text.y = element_text(size = 6))

# Site
fish_clean <- top_sites_data %>%
  filter(!is.na(SPECIES_NAME), !is.na(FISH_COUNT)) %>%
  mutate(PA = as.numeric(FISH_COUNT > 0))

site_list <- unique(fish_clean$SITE_PARENT_NAME)

site_beta_trends <- list()

# Model
for (site in site_list) {
  cat("Processing site:", site, "\n")
  
  site_data <- fish_clean %>%
    filter(SITE_PARENT_NAME == site)
  
  site_pa <- site_data %>%
    group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
    summarise(present = as.numeric(any(PA == 1)), .groups = "drop") %>%
    pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0)
  
  if (nrow(site_pa) >= 3) {
    site_matrix <- site_pa %>%
      column_to_rownames("EVENT_DATE_YEAR") %>%
      as.data.frame()
    
    if (!all(rowSums(site_matrix) == 0)) {
      beta_core <- betapart.core(site_matrix)
      beta_res <- beta.pair(beta_core, index.family = "sor")
      
      sor_mat <- as.matrix(beta_res$beta.sor)
      sim_mat <- as.matrix(beta_res$beta.sim)
      sne_mat <- as.matrix(beta_res$beta.sne)
      years <- sort(as.numeric(rownames(sor_mat)))
      
      beta_trends <- tibble(
        Year1 = years[-length(years)],
        Year2 = years[-1],
        beta_sor = map2_dbl(Year1, Year2, ~ sor_mat[as.character(.x), as.character(.y)]),
        beta_sim = map2_dbl(Year1, Year2, ~ sim_mat[as.character(.x), as.character(.y)]),
        beta_sne = map2_dbl(Year1, Year2, ~ sne_mat[as.character(.x), as.character(.y)])
      ) %>%
        pivot_longer(cols = c(beta_sor, beta_sim, beta_sne),
                     names_to = "Component", values_to = "Dissimilarity") %>%
        mutate(SITE_PARENT_NAME = site)
      
      site_beta_trends[[site]] <- beta_trends
    }
  }
}
## Processing site: Battersea 
## Processing site: Greenwich 
## Processing site: Kew 
## Processing site: Richmond 
## Processing site: West Thurrock 
## Processing site: Stanford-le-Hope Beach 
## Processing site: Denton Wharf
beta_trends_site <- bind_rows(site_beta_trends) %>%
  mutate(Component = recode(Component,
                            beta_sor = "Sorensen",
                            beta_sim = "Turnover",
                            beta_sne = "Nestedness"))
# R² and p-values
lm_stats_list <- beta_trends_site %>%
  group_by(SITE_PARENT_NAME, Component) %>%
  summarise(lm_model = list(lm(Dissimilarity ~ Year2)), .groups = "drop") %>%
  mutate(
    summary = map(lm_model, summary),
    r2 = map_dbl(summary, ~ round(.x$r.squared, 3)),
    p = map_dbl(summary, ~ round(.x$coefficients[2, 4], 3))
  ) %>%
  select(SITE_PARENT_NAME, Component, r2, p)

beta_trends_annotated <- beta_trends_site %>%
  left_join(lm_stats_list, by = c("SITE_PARENT_NAME", "Component")) %>%
  group_by(SITE_PARENT_NAME, Component) %>%
  slice_tail(n = 1) %>%  # one label per facet/component
  ungroup() %>%
  mutate(label = paste0("R² = ", r2, ", p = ", p))

ggplot(beta_trends_site, aes(x = Year2, y = Dissimilarity, color = Component)) +
  geom_line(size = 1) +
  geom_point(size = 1.5) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y", ncol = 4) +
  scale_color_manual(values = c("Sorensen" = "darkorchid",
                                "Turnover" = "steelblue",
                                "Nestedness" = "darkorange")) +
  labs(
    title = "Year-to-year beta diversity trends by site",
    x = "Year", y = "Dissimilarity",
    color = "Component") +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 9),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom")

top_sites_per_zone <- lengths_thames_filtered %>%
  group_by(TOP_TIER_SITE, SITE_PARENT_NAME) %>%
  summarise(total_samples = n(), .groups = "drop") %>%
  filter(!is.na(TOP_TIER_SITE)) %>%
  mutate(TOP_TIER_SITE = factor(TOP_TIER_SITE, levels = c("Thames Upper", "Thames Middle", "Thames Lower"))) %>%
  group_by(TOP_TIER_SITE) %>%
  slice_max(order_by = total_samples, n = 3, with_ties = FALSE) %>%
  arrange(TOP_TIER_SITE, desc(total_samples))

top_sites_data_l <- lengths_thames_filtered %>%
  semi_join(top_sites_per_zone, by = c("TOP_TIER_SITE", "SITE_PARENT_NAME"))

length_stats_site <- top_sites_data_l %>%
  group_by(EVENT_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    median_length = median(FISH_LENGTH, na.rm = TRUE),
    n_fish = n(),
    .groups = "drop") %>%
  arrange(EVENT_YEAR, SITE_PARENT_NAME)

length_labels <- length_stats_site %>%
  nest(data = -SITE_PARENT_NAME) %>%
  mutate(
    model_mean = map(data, ~ lm(mean_length ~ EVENT_YEAR, data = .x)),
    model_median = map(data, ~ lm(median_length ~ EVENT_YEAR, data = .x)),
    stats_mean = map(model_mean, tidy),
    stats_median = map(model_median, tidy)) %>%
  transmute(
    SITE_PARENT_NAME,
    label = pmap(list(SITE_PARENT_NAME, stats_mean, stats_median), function(zone, m, md) {
      slope_mean <- m[m$term == "EVENT_YEAR", "estimate"]
      p_mean <- m[m$term == "EVENT_YEAR", "p.value"]
      slope_median <- md[md$term == "EVENT_YEAR", "estimate"]
      p_median <- md[md$term == "EVENT_YEAR", "p.value"]
      paste0(
        zone, "\n",
        "Mean: ", round(slope_mean, 2), " mm/year (p=", signif(p_mean, 2), ")\n",
        "Median: ", round(slope_median, 2), " mm/year (p=", signif(p_median, 2), ")")})) %>%
  unnest(label)

ggplot(length_stats_site, aes(x = EVENT_YEAR)) +
  geom_line(aes(y = mean_length, color = "Mean", linetype = "Mean"), size = 1) +
  geom_point(aes(y = mean_length, color = "Mean")) +
  geom_smooth(aes(y = mean_length, color = "Mean", linetype = "Mean"), method = "lm", se = FALSE) +
  
  geom_line(aes(y = median_length, color = "Median", linetype = "Median"), size = 1) +
  geom_point(aes(y = median_length, color = "Median")) +
  geom_smooth(aes(y = median_length, color = "Median", linetype = "Median"), method = "lm", se = FALSE) +
  
  scale_color_manual(values = c("Mean" = "blue", "Median" = "darkorange")) +
  scale_linetype_manual(values = c("Mean" = "solid", "Median" = "dashed")) +
  
  facet_wrap(~SITE_PARENT_NAME, labeller = as_labeller(deframe(length_labels)), scales = "free_y") +
  
  labs(
    title = "Mean and median fish length by site (seine net)",
    x = "Year",
    y = "Fish length (mm)",
    color = "Statistic",
    linetype = "Statistic") +
  theme_minimal() +
  theme(legend.position = "top")

## Nesting season within zones and sites
lengths_seasonal_ea <- top_sites_data_l %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    season = case_when(
      EVENT_MONTH %in% 1:6 ~ "Early",
      EVENT_MONTH %in% 7:12 ~ "Late")) %>%
  filter(!is.na(season))

#By site
model_lsmean_site <- lm(FISH_LENGTH ~ season * SITE_PARENT_NAME + EVENT_YEAR, data = lengths_seasonal_ea)

lsmeans_site <- emmeans(model_lsmean_site, ~ season * SITE_PARENT_NAME) %>%
  as.data.frame()

ggplot(lsmeans_site, aes(x = season, y = emmean, fill = season)) +
  geom_col(position = position_dodge()) +
  geom_errorbar(aes(ymin = emmean - SE, ymax = emmean + SE), width = 0.2) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y") +
  labs(title = "LSMean Fish Length by Season and Site") +
  theme_minimal()

# Species
species_season_diffs_site <- lengths_seasonal_ea %>%
  filter(!is.na(SITE_PARENT_NAME)) %>%
  group_by(SPECIES_NAME, SITE_PARENT_NAME) %>%
  filter(n_distinct(season) == 2, n_distinct(EVENT_YEAR) >= 2) %>%
  nest() %>%
  mutate(
    model = map(data, ~ lm(FISH_LENGTH ~ season + EVENT_YEAR, data = .x)),
    emmeans = map(model, ~ emmeans(.x, ~ season)),
    contrast = map(emmeans, ~ contrast(.x, method = "revpairwise") %>% as.data.frame())) %>%
  unnest(contrast) %>%
  rename(
    estimate_diff = estimate,
    se = SE,
    pval = p.value) %>%
  mutate(
    abs_diff = abs(estimate_diff),
    direction = ifelse(estimate_diff > 0, "Late > Early", "Early > Late"),
    sig = ifelse(pval < 0.05, "Yes", "No"))

ggplot(species_season_diffs_site %>% filter(sig == "Yes"),
       aes(x = reorder(SPECIES_NAME, estimate_diff), y = estimate_diff, fill = direction)) +
  geom_col() +
  geom_errorbar(aes(ymin = estimate_diff - se, ymax = estimate_diff + se), width = 0.2) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  coord_flip() +
  labs(
    title = "Significant seasonal fish length shifts by site and species",
    subtitle = "LSMean length difference (Late − Early), p < 0.05",
    x = "Species",
    y = "Length shift (mm)",
    fill = "Direction") +
  scale_fill_manual(values = c("Late > Early" = "steelblue", "Early > Late" = "tomato")) +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 9),
    axis.text.y = element_text(size = 6),
    legend.position = "top")

# Load data
guilds <- read_csv("species_list_guilds.csv")

top_sites_data_g <- guilds %>%
  semi_join(top_sites_per_zone, by = c("TOP_TIER_SITE", "SITE_PARENT_NAME"))

# Where are the hotspots of ecological change?
e_guild_species_site <- top_sites_data_g %>%
  group_by(EVENT_DATE_YEAR, ECOLOGY_GUILD, SITE_PARENT_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

e_guild_trends_site <- e_guild_species_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Guild_Richness = n_distinct(ECOLOGY_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop"
  ) %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR))
  )

e_guild_trends_site_long <- e_guild_trends_site %>%
  pivot_longer(
    cols = c(Guild_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Guild_Abundance),
    names_to = "Metric", values_to = "Value"
  )

# Reusable model function (no change here)
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply models per site × metric
e_guild_site_results <- e_guild_trends_site_long %>%
  filter(!is.na(Value)) %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

e_guild_site_results <- e_guild_site_results %>%
  filter(!is.na(Significance))

ggplot(e_guild_site_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in ecology guild-level diversity metrics over time by site",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Where are the hotspots of ecological change?
f_guild_species_site <- top_sites_data_g %>%
  group_by(EVENT_DATE_YEAR, FEEDING_GUILD, SITE_PARENT_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

f_guild_trends_site <- f_guild_species_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Guild_Richness = n_distinct(FEEDING_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop"
  ) %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR))
  )

f_guild_trends_site_long <- f_guild_trends_site %>%
  pivot_longer(
    cols = c(Guild_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Guild_Abundance),
    names_to = "Metric", values_to = "Value"
  )

# Reusable model function (no change here)
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply models per site × metric
f_guild_site_results <- f_guild_trends_site_long %>%
  filter(!is.na(Value)) %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

f_guild_site_results <- f_guild_site_results %>%
  filter(!is.na(Significance))

ggplot(f_guild_site_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in feeding guild-level diversity metrics over time by site",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

r_guild_species_site <- top_sites_data_g %>%
  group_by(EVENT_DATE_YEAR, REPRODUCTIVE_GUILD, SITE_PARENT_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

r_guild_trends_site <- r_guild_species_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Guild_Richness = n_distinct(REPRODUCTIVE_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop"
  ) %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR))
  )

r_guild_trends_site_long <- r_guild_trends_site %>%
  pivot_longer(
    cols = c(Guild_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Guild_Abundance),
    names_to = "Metric", values_to = "Value"
  )

# Reusable model function (no change here)
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply models per site × metric
r_guild_site_results <- r_guild_trends_site_long %>%
  filter(!is.na(Value)) %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

r_guild_site_results <- r_guild_site_results %>%
  filter(!is.na(Significance))

ggplot(r_guild_site_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in reproductive guild-level diversity metrics over time by site",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Define FUNCTIONAL_ENTITY per site and year
functional_redundancy_site <- top_sites_data_g %>%
  filter(!is.na(SITE_PARENT_NAME)) %>%
  mutate(FUNCTIONAL_ENTITY = paste(ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD, sep = "_")) %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME, FUNCTIONAL_ENTITY) %>%
  summarise(Species_Richness = n_distinct(LATIN_NAME), .groups = "drop")

redundancy_site_summary <- functional_redundancy_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Distinct_Functions = n_distinct(FUNCTIONAL_ENTITY),
    Redundant_Functions = sum(Species_Richness > 1),
    Mean_Species_per_Function = mean(Species_Richness),
    Total_Species = sum(Species_Richness),
    .groups = "drop"
  )

# Prepare long format for modeling
redundancy_site_long <- redundancy_site_summary %>%
  pivot_longer(cols = -c(EVENT_DATE_YEAR, SITE_PARENT_NAME), names_to = "Metric", values_to = "Value")

# Model function for site-level trends
site_redundancy_model <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) return(empty_result)
  model <- tryCatch(lm(Value ~ EVENT_DATE_YEAR, data = df), error = function(e) return(NULL))
  if (is.null(model)) return(empty_result)
  s <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(s)) return(empty_result)
  slope <- s["EVENT_DATE_YEAR", "Estimate"]
  se <- s["EVENT_DATE_YEAR", "Std. Error"]
  p <- s["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***", p <= 0.01 ~ "**", p <= 0.05 ~ "*", TRUE ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply model per site × redundancy metric
site_redundancy_results <- redundancy_site_long %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_redundancy_model(.x)) %>%
  ungroup() %>%
  filter(!is.na(Slope))

ggplot(site_redundancy_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Functional redundancy trends by site - Thames Estuary",
    subtitle = "Slope ± 95% CI | Linear models of site-level functional metrics",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance"
  ) +
  theme_minimal()

Suggested sampling sites

setwd("C:/Users/bodna/OneDrive - University College London/Documents/PhD/PhD docs")

site_data <- read_excel("locations2.xlsx")

valid_types <- c("Natural", "Degraded", "Restored", "Engineered")
site_data <- site_data %>%
  filter(Type %in% valid_types, !is.na(Lat), !is.na(Long))

m <- leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron)

for (site_type in valid_types) {
  type_data <- site_data %>% filter(Type == site_type)
  color <- case_when(
    site_type == "Natural" ~ "green",
    site_type == "Degraded" ~ "red",
    site_type == "Restored" ~ "orange",
    site_type == "Engineered" ~ "blue"
  )
  
  m <- m %>%
    addCircleMarkers(
      data = type_data,
      lng = ~Long, lat = ~Lat,
      group = site_type,
      radius = 6,
      color = color,
      fillOpacity = 0.8,
      label = ~Site,
      popup = ~paste0(
        "<b>", Site, "</b><br>",
        "Habitat: ", Habitat, "<br>",
        "Type: ", Type, "<br>",
        "Salinity: ", Salinity, "<br>",
        "Access: ", Access, "<br>",
        "Oversight: ", Oversight
      )
    )
}

m <- m %>%
  addLayersControl(
    overlayGroups = valid_types,
    options = layersControlOptions(collapsed = FALSE)
  ) %>%
  addLegend(
    position = "bottomright",
    colors = c("green", "red", "orange", "blue"),
    labels = valid_types,
    title = "Site type")

m